1 |
#' Reformat Values |
|
2 |
#' @param obj (`character`, `factor` or `list of data.frame`) to reformat. |
|
3 |
#' @param format (`rule`) or (`list`) of `rule` depending on the class of obj. |
|
4 |
#' @param ... for compatibility between methods and pass additional special mapping to transform rules. |
|
5 |
#' * `.string_as_fct` (`flag`) whether the reformatted character object should be converted to factor. |
|
6 |
#' * `.to_NA` (`character`) values that should be converted to `NA`. For `factor`, the corresponding levels are |
|
7 |
#' dropped. If `NULL`, the argument will be taken from the `to_NA`attribute of the rule. |
|
8 |
#' * `.drop` (`flag`) whether to drop empty levels. If `NULL`, the argument will be taken from the `drop`attribute of |
|
9 |
#' the rule. |
|
10 |
#' * `.na_last` (`flag`) whether the level replacing `NA` should be last. |
|
11 |
#' @param verbose (`flag`) whether to print the format. |
|
12 |
#' @returns (`character`, `factor` or `list of data.frame`) with remapped values. |
|
13 |
#' |
|
14 |
#' @export |
|
15 |
#' @note When the rule is empty rule or when values subject to reformatting are absent from the object, no error is |
|
16 |
#' raised. The conversion to factor if `.string_as_fct = TRUE`) is still carried out. The conversion of the levels |
|
17 |
#' declared in `.to_NA` to `NA` values occurs after the remapping. `NA` values created this way are not affected by a |
|
18 |
#' rule declaring a remapping of `NA` values. For factors, level dropping is the last step, hence, levels converted to |
|
19 |
#' `NA` by the `.to_NA` argument, will be removed if `.drop` is `TRUE`. Arguments passed via `reformat` override the |
|
20 |
#' ones defined during rule creation. |
|
21 |
#' |
|
22 |
#' @rdname reformat |
|
23 |
#' |
|
24 |
reformat <- function(obj, ...) { |
|
25 | 57x |
UseMethod("reformat") |
26 |
} |
|
27 | ||
28 |
#' @export |
|
29 |
#' @rdname reformat |
|
30 |
reformat.default <- function(obj, format, ...) { |
|
31 | 1x |
rlang::warn(paste0("Not implemented for class: ", toString(class(obj)), "! Returning original object.")) |
32 | 1x |
return(obj) |
33 |
} |
|
34 | ||
35 |
#' @export |
|
36 |
#' @rdname reformat |
|
37 |
#' |
|
38 |
#' @examples |
|
39 |
#' # Reformatting of character. |
|
40 |
#' obj <- c("a", "b", "x", NA, "") |
|
41 |
#' attr(obj, "label") <- "my label" |
|
42 |
#' format <- rule("A" = "a", "NN" = NA) |
|
43 |
#' |
|
44 |
#' reformat(obj, format) |
|
45 |
#' reformat(obj, format, .string_as_fct = FALSE, .to_NA = NULL) |
|
46 |
#' |
|
47 |
reformat.character <- function(obj, format, ..., verbose = FALSE) { |
|
48 | 21x |
checkmate::assert_class(format, "rule") |
49 | 21x |
checkmate::assert_flag(as.logical(verbose)) |
50 | ||
51 |
# Give priority to argument defined in reformat. |
|
52 | 21x |
format <- do.call(rule, modifyList(as.list(format), list(...), keep.null = TRUE)) |
53 | 21x |
if (verbose) { |
54 | 1x |
print(format) |
55 |
} |
|
56 | ||
57 | 21x |
if (attr(format, ".string_as_fct")) { |
58 |
# Keep attributes. |
|
59 | 12x |
att <- attributes(obj) |
60 | 12x |
obj_fact <- as.factor(obj) |
61 | 12x |
supp_att_name <- setdiff(names(att), attributes(obj_fact)) |
62 | 12x |
supp_att <- att[supp_att_name] |
63 | 12x |
attributes(obj_fact) <- c(attributes(obj_fact), supp_att) |
64 | ||
65 | 12x |
reformat(obj_fact, format) |
66 |
} else { |
|
67 | 9x |
value_match <- unlist(format) |
68 | 9x |
m <- match(obj, value_match) |
69 | 9x |
obj[!is.na(m)] <- names(format)[m[!is.na(m)]] |
70 | 9x |
val_to_NA <- attr(format, ".to_NA") |
71 | 9x |
if (!is.null(val_to_NA)) { |
72 | 5x |
obj[obj %in% val_to_NA] <- NA_character_ |
73 |
} |
|
74 | ||
75 | 9x |
obj |
76 |
} |
|
77 |
} |
|
78 | ||
79 |
#' @export |
|
80 |
#' @rdname reformat |
|
81 |
#' |
|
82 |
#' @examples |
|
83 |
#' # Reformatting of factor. |
|
84 |
#' obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z")) |
|
85 |
#' attr(obj, "label") <- "my label" |
|
86 |
#' format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P") |
|
87 |
#' |
|
88 |
#' reformat(obj, format) |
|
89 |
#' reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE) |
|
90 |
#' |
|
91 |
reformat.factor <- function(obj, format, ..., verbose = FALSE) { |
|
92 | 30x |
checkmate::assert_class(format, "rule") |
93 | 30x |
checkmate::assert_flag(verbose) |
94 | ||
95 | 30x |
format <- do.call(rule, modifyList(as.list(format), list(...), keep.null = TRUE)) |
96 | 30x |
if (verbose) { |
97 | 1x |
print(format) |
98 |
} |
|
99 | ||
100 | 30x |
any_na <- anyNA(obj) |
101 | 30x |
if (any(is.na(format)) && any_na) { |
102 | 21x |
obj <- forcats::fct_na_value_to_level(obj) |
103 |
} |
|
104 | ||
105 | 30x |
absent_format <- format[!format %in% levels(obj)] |
106 | 30x |
sel_format <- format[format %in% levels(obj)] |
107 | 30x |
obj <- forcats::fct_recode(obj, !!!sel_format) |
108 | 30x |
obj <- forcats::fct_expand(obj, unique(names(absent_format))) |
109 | 30x |
obj <- forcats::fct_relevel(obj, unique(names(format))) |
110 | ||
111 | 30x |
if (any(is.na(format)) && attr(format, ".na_last")) { |
112 | 16x |
na_lvl <- names(format)[is.na(format)] |
113 | 16x |
obj <- forcats::fct_relevel(obj, na_lvl, after = Inf) |
114 |
} |
|
115 | ||
116 | 30x |
drop_lvl <- attr(format, ".drop") |
117 | 30x |
if (drop_lvl) { |
118 | 2x |
obj <- forcats::fct_drop(obj) |
119 |
} |
|
120 | ||
121 |
# Levels converted to NA are dropped. |
|
122 | 30x |
val_to_NA <- attr(format, ".to_NA") |
123 | 30x |
if (!is.null(val_to_NA)) { |
124 | 30x |
obj <- forcats::fct_na_level_to_value(obj, val_to_NA) |
125 |
} |
|
126 | ||
127 | 30x |
obj |
128 |
} |
|
129 | ||
130 |
#' @export |
|
131 |
#' @rdname reformat |
|
132 |
#' |
|
133 |
#' @note the variables listed under the `all_dataset` keyword will be reformatted with the corresponding rule in every |
|
134 |
#' data set except where another rule is specified for the same variable under a specific data set name. |
|
135 |
#' |
|
136 |
#' @examples |
|
137 |
#' # Reformatting of list of data.frame. |
|
138 |
#' df1 <- data.frame( |
|
139 |
#' var1 = c("a", "b", NA), |
|
140 |
#' var2 = factor(c("F1", "F2", NA)) |
|
141 |
#' ) |
|
142 |
#' |
|
143 |
#' df2 <- data.frame( |
|
144 |
#' var1 = c("x", NA, "y"), |
|
145 |
#' var2 = factor(c("F11", NA, "F22")) |
|
146 |
#' ) |
|
147 |
#' |
|
148 |
#' db <- list(df1 = df1, df2 = df2) |
|
149 |
#' |
|
150 |
#' format <- list( |
|
151 |
#' df1 = list( |
|
152 |
#' var1 = rule("X" = "x", "N" = NA, .to_NA = "b") |
|
153 |
#' ), |
|
154 |
#' df2 = list( |
|
155 |
#' var2 = rule("f11" = "F11", "NN" = NA) |
|
156 |
#' ), |
|
157 |
#' all_datasets = list( |
|
158 |
#' var1 = rule("xx" = "x", "aa" = "a") |
|
159 |
#' ) |
|
160 |
#' ) |
|
161 |
#' |
|
162 |
#' reformat(db, format) |
|
163 |
reformat.list <- function(obj, |
|
164 |
format, |
|
165 |
..., |
|
166 |
verbose = get_arg("dunlin.reformat.verbose", "R_DUNLIN_REFORMAT_VERBOSE", FALSE)) { |
|
167 | 5x |
checkmate::assert_list(obj, types = c("data.frame", "tibble")) |
168 | 5x |
checkmate::assert_named(obj) |
169 | 5x |
checkmate::assert_list(format, names = "unique", types = "list", null.ok = TRUE) |
170 | 5x |
verbose <- as.logical(verbose) |
171 | 5x |
checkmate::assert_flag(verbose) |
172 | ||
173 | 5x |
if (length(format) == 0) { |
174 | 2x |
return(obj) |
175 |
} |
|
176 | ||
177 | 3x |
assert_valid_format(format) |
178 | ||
179 | 3x |
ls_datasets <- names(obj) |
180 | 3x |
format <- h_expand_all_datasets(format, ls_datasets) |
181 | ||
182 | 3x |
if (verbose) { |
183 | 1x |
for (tb in names(format)) { |
184 | 2x |
for (cl in names(format[[tb]])) { |
185 | 2x |
cat(sprintf("\nData frame `%s`, column `%s`:\n", tb, cl)) |
186 | 2x |
print(format[[tb]][[cl]]) |
187 |
} |
|
188 |
} |
|
189 | 1x |
cat("\n") |
190 |
} |
|
191 | ||
192 | 3x |
for (tab in ls_datasets) { |
193 | 7x |
local_map <- format[[tab]] |
194 | 7x |
local_map <- local_map[names(local_map) %in% names(obj[[tab]])] |
195 | ||
196 | 7x |
obj[[tab]][names(local_map)] <- mapply( |
197 | 7x |
function(rl, col) reformat(obj[[tab]][[col]], format = rl, ...), |
198 | 7x |
local_map, |
199 | 7x |
names(local_map), |
200 | 7x |
SIMPLIFY = FALSE |
201 |
) |
|
202 |
} |
|
203 | ||
204 | 3x |
obj |
205 |
} |
|
206 | ||
207 |
#' Propagate the rules for all datasets |
|
208 |
#' |
|
209 |
#' @inheritParams reformat |
|
210 |
#' @param ls_datasets (`character`) the name of all datasets in the object to reformat. |
|
211 |
#' @returns a nested `list` attributing a rule to be applied to specific variables of specific datasets. |
|
212 |
#' |
|
213 |
#' @details the rules described under `all_datasets` are propagated to all data sets for the corresponding variables |
|
214 |
#' except in data sets where a rule is already attributed to the same variable. |
|
215 |
#' |
|
216 |
#' @keywords internal |
|
217 |
h_expand_all_datasets <- function(format_list, ls_datasets = NULL) { |
|
218 | 6x |
assert_valid_list_format(list(f = format_list)) |
219 | 6x |
checkmate::assert_character(ls_datasets, null.ok = TRUE) |
220 | ||
221 | 6x |
spec_datasets <- format_list[setdiff(names(format_list), "all_datasets")] |
222 | ||
223 | 6x |
if (!is.null(ls_datasets)) { |
224 | 5x |
to_all_datasets <- list() |
225 | 5x |
to_all_datasets[ls_datasets] <- format_list["all_datasets"] |
226 | 5x |
to_all_datasets <- base::Filter(function(x) !is.null(x), to_all_datasets) |
227 | ||
228 | 5x |
modifyList(to_all_datasets, spec_datasets) |
229 |
} else { |
|
230 | 1x |
spec_datasets |
231 |
} |
|
232 |
} |
1 |
#' Assert Nested List can be used as Format Argument in Reformat. |
|
2 |
#' |
|
3 |
#' @param object (`list`) to assert. |
|
4 |
#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled. |
|
5 |
#' |
|
6 |
#' @export |
|
7 |
#' @examples |
|
8 |
#' format <- list( |
|
9 |
#' df1 = list( |
|
10 |
#' var1 = rule("X" = "x", "N" = c(NA, "")) |
|
11 |
#' ), |
|
12 |
#' df2 = list( |
|
13 |
#' var1 = rule(), |
|
14 |
#' var2 = rule("f11" = "F11", "NN" = NA) |
|
15 |
#' ), |
|
16 |
#' df3 = list() |
|
17 |
#' ) |
|
18 |
#' |
|
19 |
#' assert_valid_format(format) |
|
20 |
assert_valid_format <- function(object) { |
|
21 | 5x |
coll <- checkmate::makeAssertCollection() |
22 | ||
23 |
# Check object. |
|
24 | 5x |
checkmate::assert_list(object, names = "unique", types = "list", add = coll) |
25 | ||
26 |
# Check table level. |
|
27 | 5x |
mapply( |
28 | 5x |
function(x, xtable) { |
29 | 12x |
checkmate::assert_list( |
30 | 12x |
x, |
31 | 12x |
names = "unique", |
32 | 12x |
types = "rule", |
33 | 12x |
any.missing = FALSE, |
34 | 12x |
.var.name = paste0("[", xtable, "]"), |
35 | 12x |
add = coll |
36 |
) |
|
37 |
}, |
|
38 | 5x |
object, |
39 | 5x |
names(object) |
40 |
) |
|
41 | ||
42 | 5x |
checkmate::reportAssertions(coll) |
43 |
} |
|
44 | ||
45 |
#' Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat. |
|
46 |
#' |
|
47 |
#' @param object (`list`) to assert. |
|
48 |
#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled. |
|
49 |
#' |
|
50 |
#' @export |
|
51 |
#' @examples |
|
52 |
#' format <- list( |
|
53 |
#' df1 = list( |
|
54 |
#' var1 = list("X" = "x", "N" = c(NA, "")) |
|
55 |
#' ), |
|
56 |
#' df2 = list( |
|
57 |
#' var1 = list(), |
|
58 |
#' var2 = list("f11" = "F11", "NN" = NA) |
|
59 |
#' ), |
|
60 |
#' df3 = list() |
|
61 |
#' ) |
|
62 |
#' |
|
63 |
#' assert_valid_list_format(format) |
|
64 |
assert_valid_list_format <- function(object) { |
|
65 | 8x |
coll <- checkmate::makeAssertCollection() |
66 | ||
67 |
# Check object. |
|
68 | 8x |
checkmate::assert_list(object, names = "unique", types = "list", add = coll) |
69 | ||
70 |
# Check table level. |
|
71 | 8x |
mapply( |
72 | 8x |
function(x, xtable) { |
73 | 10x |
checkmate::assert_list( |
74 | 10x |
x, |
75 | 10x |
names = "unique", |
76 | 10x |
types = "list", |
77 | 10x |
any.missing = FALSE, |
78 | 10x |
.var.name = paste0("[", xtable, "]"), |
79 | 10x |
add = coll |
80 |
) |
|
81 |
}, |
|
82 | 8x |
object, |
83 | 8x |
names(object) |
84 |
) |
|
85 | ||
86 |
# Check variable level. |
|
87 | 8x |
mapply( |
88 | 8x |
function(x, xtable) { |
89 | 10x |
xvar <- names(x) |
90 | 10x |
mapply( |
91 | 10x |
function(x, xvar) { |
92 | 20x |
checkmate::assert_list( |
93 | 20x |
x, |
94 | 20x |
names = "unique", |
95 | 20x |
types = c("character", "numeric", "logical"), |
96 | 20x |
.var.name = paste0("[", xtable, ".", xvar, "]"), |
97 | 20x |
add = coll |
98 |
) |
|
99 |
}, |
|
100 | 10x |
x, |
101 | 10x |
xvar |
102 |
) |
|
103 |
}, |
|
104 | 8x |
object, |
105 | 8x |
names(object) |
106 |
) |
|
107 | ||
108 | 8x |
checkmate::reportAssertions(coll) |
109 |
} |
|
110 | ||
111 |
# assert_all_tablenames ---- |
|
112 | ||
113 |
#' Assert that all names are among names of a `list` of `data.frame`. |
|
114 |
#' |
|
115 |
#' @param db (`list` of `data.frame`) input to check for the presence of tables. |
|
116 |
#' @param tab (`character`) the names of the tables to be checked. |
|
117 |
#' @param null_ok (`flag`) can `x` be NULL. |
|
118 |
#' @param qualifier (`string`) to be returned if the check fails. |
|
119 |
#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled. |
|
120 |
#' |
|
121 |
#' @export |
|
122 |
#' |
|
123 |
#' @examples |
|
124 |
#' lsd <- list( |
|
125 |
#' mtcars = mtcars, |
|
126 |
#' iris = iris |
|
127 |
#' ) |
|
128 |
#' assert_all_tablenames(lsd, c("mtcars", "iris"), qualifier = "first test:") |
|
129 |
assert_all_tablenames <- function(db, tab, null_ok = TRUE, qualifier = NULL) { |
|
130 | 23x |
checkmate::assert_list(db, types = "data.frame", names = "unique") |
131 | 23x |
checkmate::assert_character(tab, null.ok = null_ok) |
132 | 23x |
checkmate::assert_string(qualifier, null.ok = TRUE) |
133 | ||
134 | 23x |
diff <- setdiff(tab, names(db)) |
135 | ||
136 | 23x |
if (length(diff) == 0) { |
137 | 20x |
invisible(NULL) |
138 |
} else { |
|
139 | 3x |
stop( |
140 | 3x |
paste(qualifier, "Expected table names:", toString(diff), "not in", deparse(substitute(db))) |
141 |
) |
|
142 |
} |
|
143 |
} |
|
144 | ||
145 |
# assert_one_tablenames ---- |
|
146 | ||
147 |
#' Assert that at least one name is among table names of a `list` of `data.frame`. |
|
148 |
#' |
|
149 |
#' @param db (`list` of `data.frame`) input to check for the presence or tables. |
|
150 |
#' @param tab (`character`) the names of the tables to be checked. |
|
151 |
#' @param null_ok (`flag`) can `x` be NULL. |
|
152 |
#' @param qualifier (`string`) to be returned if the check fails. |
|
153 |
#' @returns invisible `TRUE` or an error message if the criteria are not fulfilled. |
|
154 |
#' |
|
155 |
#' @keywords internal |
|
156 |
assert_one_tablenames <- function(db, tab, null_ok = TRUE, qualifier = NULL) { |
|
157 | 4x |
checkmate::assert_list(db, types = "data.frame", names = "unique") |
158 | 4x |
checkmate::assert_character(tab, null.ok = null_ok) |
159 | 4x |
checkmate::assert_string(qualifier, null.ok = TRUE) |
160 | ||
161 | 4x |
diff <- setdiff(tab, names(db)) |
162 | ||
163 | 4x |
common <- intersect(tab, names(db)) |
164 | ||
165 | 4x |
if (length(common) > 0) { |
166 | 2x |
invisible(NULL) |
167 |
} else { |
|
168 | 2x |
stop( |
169 | 2x |
paste(qualifier, "At least one of:", toString(tab), "is expected to be a table name of", deparse(substitute(db))) |
170 |
) |
|
171 |
} |
|
172 |
} |
1 |
#' Safe transformer |
|
2 |
#' |
|
3 |
#' @param text (`string`) to be substituted. |
|
4 |
#' @param envir (`environment`) containing key-value pairs describing the substitution to perform. |
|
5 |
#' @returns `string` with substituted placeholders. |
|
6 |
#' |
|
7 |
#' @details Obtain content in global environment by default. |
|
8 |
#' If not found, use the environment here. The function first looks for an exact match. If not found, it searches for a |
|
9 |
#' match in lower case then apply to the result the same case as the original value. |
|
10 |
#' |
|
11 |
#' @keywords internal |
|
12 |
safe_transformer <- function(text, envir) { |
|
13 | 14x |
if (exists(text, envir = envir, inherits = FALSE, mode = "character")) { |
14 | 4x |
res <- get(text, envir = envir, mode = "character") |
15 | 4x |
return(toString(res)) |
16 |
} |
|
17 | ||
18 | 10x |
text_lower <- tolower(text) |
19 | 10x |
res <- if (exists(text_lower, envir = envir, inherits = FALSE, mode = "character")) { |
20 | 6x |
get(text_lower, envir = envir, mode = "character") |
21 |
} else { |
|
22 | 4x |
text |
23 |
} |
|
24 | ||
25 | 10x |
if (is.character(res)) { |
26 | 10x |
if (identical(text, tolower(text))) { |
27 | 2x |
res <- tolower(res) |
28 | 8x |
} else if (identical(text, toupper(text))) { |
29 | 3x |
res <- toupper(res) |
30 | 5x |
} else if (identical(text, stringr::str_to_title(text))) { |
31 | 5x |
res <- stringr::str_to_title(res) |
32 |
} |
|
33 |
} |
|
34 | ||
35 | 10x |
return(toString(res)) |
36 |
} |
|
37 | ||
38 |
#' Render whiskers safely |
|
39 |
#' @param x (`character`) input to be rendered safely. |
|
40 |
#' @returns `character` with substituted placeholders. |
|
41 |
#' |
|
42 |
#' @note The strings enclosed in `{}` are substituted using the key-values pairs set with `add_whiskers`. |
|
43 |
#' |
|
44 |
#' @export |
|
45 |
#' @examples |
|
46 |
#' render_safe("Name of {Patient_label}") |
|
47 |
render_safe <- function(x) { |
|
48 | 11x |
checkmate::assert_character(x, null.ok = TRUE) |
49 | 11x |
if (is.null(x)) { |
50 | ! |
return(NULL) |
51 |
} |
|
52 | 11x |
ret <- lapply( |
53 | 11x |
x, |
54 | 11x |
glue::glue, |
55 | 11x |
.transformer = safe_transformer, |
56 | 11x |
.envir = whisker_env, |
57 | 11x |
.null = "NULL", |
58 | 11x |
.open = "{", |
59 | 11x |
.close = "}" |
60 |
) |
|
61 | 11x |
ret <- vapply(ret, `[[`, i = 1L, FUN.VALUE = "") |
62 | 11x |
setNames(ret, names(x)) |
63 |
} |
|
64 |
#' Add whisker values |
|
65 |
#' @param x Named (`character`) input. |
|
66 |
#' @returns invisible `NULL`. Assign the key-value pair provided as argument in the whisker environment. |
|
67 |
#' |
|
68 |
#' @details The names of the character gives the string to be replaced and the value gives the new string. |
|
69 |
#' |
|
70 |
#' @export |
|
71 |
#' @examples |
|
72 |
#' my_whiskers <- c(Placeholder = "Replacement", Placeholder2 = "Replacement2") |
|
73 |
#' add_whisker(my_whiskers) |
|
74 |
add_whisker <- function(x) { |
|
75 | 5x |
checkmate::assert_character(x, names = "unique", any.missing = FALSE) |
76 | 4x |
lapply( |
77 | 4x |
names(x), |
78 | 4x |
function(i) { |
79 | 5x |
assign(i, x[i], envir = whisker_env) |
80 |
} |
|
81 |
) |
|
82 | 4x |
invisible() |
83 |
} |
|
84 | ||
85 |
#' Remove whisker values |
|
86 |
#' @param x Named (`character`) input. |
|
87 |
#' @returns invisible `NULL`. Removes `x` from the whisker environment. |
|
88 |
#' @export |
|
89 |
remove_whisker <- function(x) { |
|
90 | 8x |
checkmate::assert_character(x, any.missing = FALSE) |
91 | 8x |
rm(list = x, envir = whisker_env) |
92 |
} |
|
93 | ||
94 |
#' Show Whisker Values |
|
95 |
#' @returns invisible `NULL`. Prints the values stored in the whisker environment. |
|
96 |
#' @export |
|
97 |
#' @examples |
|
98 |
#' show_whisker() |
|
99 |
show_whisker <- function() { |
|
100 | 6x |
l <- ls(envir = whisker_env) |
101 | 6x |
val <- lapply( |
102 | 6x |
l, |
103 | 6x |
function(x) { |
104 | 10x |
if (exists(x, envir = whisker_env, mode = "character")) { |
105 | 9x |
setNames( |
106 | 9x |
toString(get(x, envir = whisker_env, mode = "character")), |
107 | 9x |
x |
108 |
) |
|
109 |
} |
|
110 |
} |
|
111 |
) |
|
112 | ||
113 | 6x |
lapply(val, function(x) cat(sprintf("%s --> %s\n", names(x), x))) |
114 | 6x |
invisible() |
115 |
} |
1 |
#' Create rule based on mappings |
|
2 |
#' @param ... Mapping pairs, the argument name is the transformed while |
|
3 |
#' its values are original values. |
|
4 |
#' @param .lst (`list`) of mapping. |
|
5 |
#' @param .string_as_fct (`flag`) whether to convert characters to factors. |
|
6 |
#' @param .na_last (`flag`) whether the level replacing `NA` should be last. |
|
7 |
#' @param .drop (`flag`) whether to drop empty levels. |
|
8 |
#' @param .to_NA (`character`) values that should be converted to `NA`. Set to `NULL` if nothing should be converted to |
|
9 |
#' `NA`. |
|
10 |
#' @returns a `rule` object. |
|
11 |
#' |
|
12 |
#' @note Conversion to `NA` is the last step of the remapping process. |
|
13 |
#' |
|
14 |
#' @export |
|
15 |
#' @examples |
|
16 |
#' rule("X" = "x", "Y" = c("y", "z")) |
|
17 |
#' rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE) |
|
18 |
#' |
|
19 |
rule <- function(..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = "") { |
|
20 | 135x |
checkmate::assert_flag(.string_as_fct) |
21 | 135x |
checkmate::assert_flag(.na_last) |
22 | 135x |
checkmate::assert_flag(.drop) |
23 | 135x |
checkmate::assert_character(.to_NA, null.ok = TRUE, any.missing = FALSE) |
24 | ||
25 | 135x |
.lst[is.na(.lst)] <- NA_character_ |
26 | 135x |
if (!checkmate::test_list(.lst, types = c("character"))) { |
27 | 1x |
rlang::abort("Value mapping may only contain the type: {character}") |
28 |
} |
|
29 | 134x |
vals <- as.character(unlist(.lst, use.names = FALSE)) |
30 | 134x |
checkmate::assert_character(vals, unique = TRUE) |
31 | 132x |
nms <- unlist(lapply(seq_len(length(.lst)), function(x) { |
32 | 263x |
rep(names(.lst)[x], length(.lst[[x]])) |
33 |
})) |
|
34 | ||
35 | 132x |
res <- structure( |
36 | 132x |
setNames(vals, nms), |
37 | 132x |
class = c("rule", "character"), |
38 | 132x |
.string_as_fct = .string_as_fct, |
39 | 132x |
.na_last = .na_last, |
40 | 132x |
.drop = .drop, |
41 | 132x |
.to_NA = .to_NA |
42 |
) |
|
43 | ||
44 | 132x |
res |
45 |
} |
|
46 | ||
47 |
#' @export |
|
48 |
#' |
|
49 |
print.rule <- function(x, ...) { |
|
50 | 12x |
cat("Mapping of:\n") |
51 | 12x |
nms <- unique(names(x)) |
52 | 12x |
if (length(x) == 0) { |
53 | 3x |
cat("Empty mapping.\n") |
54 |
} else { |
|
55 | 9x |
for (i in nms) { |
56 | 26x |
ori_nms <- unlist(x[names(x) %in% i]) |
57 | 26x |
ori_nms <- ifelse(is.na(ori_nms), "<NA>", stringr::str_c("\"", ori_nms, "\"")) |
58 | 26x |
ori_nms <- toString(ori_nms) |
59 | 26x |
cat(i, " <- ", ori_nms, "\n") |
60 |
} |
|
61 |
} |
|
62 | 12x |
.to_NA <- attr(x, ".to_NA") |
63 | 12x |
if (!is.null(.to_NA)) { |
64 | 12x |
cat("Convert to <NA>:", toString(stringr::str_c("\"", .to_NA, "\"")), "\n") |
65 |
} |
|
66 | 12x |
cat("Convert to factor:", attr(x, ".string_as_fct"), "\n") |
67 | 12x |
cat("Drop unused level:", attr(x, ".drop"), "\n") |
68 | 12x |
cat("NA-replacing level in last position:", attr(x, ".na_last"), "\n") |
69 |
} |
|
70 | ||
71 |
#' Convert nested list into list of `rule` |
|
72 |
#' @param obj (`nested list`) to convert into list of rules. |
|
73 |
#' @returns a `list` of `rule` objects. |
|
74 |
#' @export |
|
75 |
#' @examples |
|
76 |
#' obj <- list( |
|
77 |
#' rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"), |
|
78 |
#' rule2 = list(Missing = c(NA, "")), |
|
79 |
#' rule3 = list(Missing = c(NA, ""), .drop = TRUE), |
|
80 |
#' rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy") |
|
81 |
#' ) |
|
82 |
#' list2rules(obj) |
|
83 |
#' |
|
84 |
list2rules <- function(obj) { |
|
85 | 3x |
coll <- checkmate::makeAssertCollection() |
86 | 3x |
checkmate::assert_list(obj, types = "list", add = coll) |
87 | 3x |
checkmate::assert_names(names(obj), type = "unique", add = coll) |
88 | 3x |
checkmate::reportAssertions(coll) |
89 | ||
90 | 2x |
lapply(obj, function(x) { |
91 | 6x |
do.call("rule", x) |
92 |
}) |
|
93 |
} |
|
94 | ||
95 |
#' Convert Rule to List |
|
96 |
#' @param x (`rule`) to convert. |
|
97 |
#' @param ... not used. |
|
98 |
#' @returns an object of class `list`. |
|
99 |
#' |
|
100 |
#' @export |
|
101 |
#' @examples |
|
102 |
#' x <- rule("a" = c("a", "b"), "X" = "x", .to_NA = c("v", "w")) |
|
103 |
#' as.list(x) |
|
104 |
as.list.rule <- function(x, ...) { |
|
105 | 61x |
nms <- names(x) |
106 | 61x |
unames <- unique(nms) |
107 | 61x |
res <- lapply(unames, function(i) { |
108 | 119x |
unname(x[nms == i]) |
109 |
}) |
|
110 | ||
111 | ||
112 | 61x |
att <- attributes(x) |
113 | 61x |
arg <- att[!names(att) %in% c("names", "class")] |
114 | ||
115 | 61x |
res <- c(res, unname(arg)) |
116 | 61x |
unames <- c(unames, names(arg)) |
117 | ||
118 | 61x |
r_list <- setNames(res, unames) |
119 | ||
120 |
# Explicitly declare .to_NA value, even if NULL. |
|
121 | 61x |
.to_NA <- r_list[[".to_NA"]] |
122 | 61x |
if (is.null(.to_NA)) { |
123 | 3x |
r_list[".to_NA"] <- list(NULL) |
124 |
} |
|
125 | ||
126 | 61x |
r_list |
127 |
} |
|
128 | ||
129 |
#' Combine Two Rules |
|
130 |
#' |
|
131 |
#' @param x (`rule`) to modify. |
|
132 |
#' @param y (`rule`) rule whose mapping will take precedence over the ones described in `x`. |
|
133 |
#' @param ... not used. |
|
134 |
#' |
|
135 |
#' @note The order of the mappings in the resulting rule corresponds to the order of the mappings in `x` followed by the |
|
136 |
#' mappings that are only present in `y`. |
|
137 |
#' |
|
138 |
#' @returns a `rule`. |
|
139 |
#' @export |
|
140 |
#' @examples |
|
141 |
#' r1 <- rule( |
|
142 |
#' "first" = c("from ori rule", "FROM ORI RULE"), |
|
143 |
#' "last" = c(NA, "last"), |
|
144 |
#' .to_NA = "X", |
|
145 |
#' .drop = TRUE |
|
146 |
#' ) |
|
147 |
#' r2 <- rule( |
|
148 |
#' "first" = c("F", "f"), |
|
149 |
#' "second" = c("S", "s"), |
|
150 |
#' "third" = c("T", "t"), |
|
151 |
#' .to_NA = "something" |
|
152 |
#' ) |
|
153 |
#' combine_rules(r1, r2) |
|
154 |
combine_rules <- function(x, y, ...) { |
|
155 | 6x |
checkmate::assert_class(x, "rule", null.ok = TRUE) |
156 | 6x |
checkmate::assert_class(y, "rule", null.ok = TRUE) |
157 | ||
158 | 6x |
if (is.null(x) && is.null(y)) { |
159 | 1x |
rlang::abort("Both rules are NULL.") |
160 |
} |
|
161 | ||
162 |
# If one of the rules is NULL, return the other (via empty list). |
|
163 | 5x |
x <- as.list(x) |
164 | 5x |
y <- as.list(y) |
165 | ||
166 | 5x |
x[names(y)] <- y |
167 | ||
168 | 5x |
r <- do.call(rule, x) |
169 | 5x |
r |
170 |
} |
|
171 | ||
172 |
#' Combine Rules Found in Lists of Rules. |
|
173 |
#' |
|
174 |
#' @param x (`list`) of `rule` objects. |
|
175 |
#' @param val (`list`) of `rule` objects. |
|
176 |
#' @param ... passed to [`dunlin::combine_rules`]. |
|
177 |
#' |
|
178 |
#' @returns a `list` of `rule` objects. |
|
179 |
#' @export |
|
180 |
#' @examples |
|
181 |
#' l1 <- list( |
|
182 |
#' r1 = rule( |
|
183 |
#' "first" = c("overwritten", "OVERWRITTEN"), |
|
184 |
#' "almost first" = c(NA, "almost") |
|
185 |
#' ), |
|
186 |
#' r2 = rule( |
|
187 |
#' ANYTHING = "anything" |
|
188 |
#' ) |
|
189 |
#' ) |
|
190 |
#' |
|
191 |
#' l2 <- list( |
|
192 |
#' r1 = rule( |
|
193 |
#' "first" = c("F", "f"), |
|
194 |
#' "second" = c("S", "s"), |
|
195 |
#' "third" = c("T", "t"), |
|
196 |
#' .to_NA = "something" |
|
197 |
#' ), |
|
198 |
#' r3 = rule( |
|
199 |
#' SOMETHING = "something" |
|
200 |
#' ) |
|
201 |
#' ) |
|
202 |
#' |
|
203 |
#' combine_list_rules(l1, l2) |
|
204 |
combine_list_rules <- function(x, val, ...) { |
|
205 |
# Unique names prevents zero-character names. |
|
206 | 2x |
checkmate::assert_list(x, types = "rule", null.ok = FALSE, names = "unique") |
207 | 1x |
checkmate::assert_list(val, types = "rule", null.ok = FALSE, names = "unique") |
208 | ||
209 | 1x |
vnames <- names(val) |
210 | ||
211 | 1x |
for (v in vnames) { |
212 | 2x |
x[[v]] <- combine_rules(x[[v]], val[[v]], ...) |
213 |
} |
|
214 | 1x |
x |
215 |
} |
1 |
#' Propagate Column |
|
2 |
#' |
|
3 |
#' `propagate`copy columns from a given table of a `list` of `data.frame` to all tables based on other |
|
4 |
#' common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving |
|
5 |
#' tables. In safe mode, the key must be unique in the original table. |
|
6 |
#' |
|
7 |
#' @param db (`list` of `data.frame`) object for which some variable need to be propagated. |
|
8 |
#' @param from (`string`) the name of the table where the variables to propagate are stored. |
|
9 |
#' @param add (`character`) the names of the variables to propagate. |
|
10 |
#' @param by (`character`) the key binding the `from` table to the other tables. |
|
11 |
#' @param safe (`flag`) should the key be checked for uniqueness in the `from` table. |
|
12 |
#' |
|
13 |
#' @returns updated `list` of `data.frame`. |
|
14 |
#' |
|
15 |
#' @rdname propagate |
|
16 |
#' @export |
|
17 |
#' |
|
18 |
propagate <- function(db, from, add, by, safe = TRUE) { |
|
19 | 3x |
UseMethod("propagate") |
20 |
} |
|
21 | ||
22 |
#' @rdname propagate |
|
23 |
#' @export |
|
24 |
#' |
|
25 |
#' |
|
26 |
#' @examples |
|
27 |
#' df1 <- data.frame( |
|
28 |
#' id1 = c("a", "a", "c", "d", "e", "f"), |
|
29 |
#' id2 = c("A", "B", "A", "A", "A", "A"), |
|
30 |
#' int = c(1, 2, 3, 4, 5, 6), |
|
31 |
#' bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE) |
|
32 |
#' ) |
|
33 |
#' |
|
34 |
#' df2 <- data.frame( |
|
35 |
#' id1 = c("a", "a", "d", "e", "f", "g"), |
|
36 |
#' id2 = c("A", "B", "A", "A", "A", "A") |
|
37 |
#' ) |
|
38 |
#' |
|
39 |
#' df3 <- data.frame( |
|
40 |
#' id1 = c("a", "c", "d", "e", "f", "x"), |
|
41 |
#' id2 = c("A", "A", "A", "A", "B", "A"), |
|
42 |
#' int = c(11, 22, 33, 44, 55, 66) |
|
43 |
#' ) |
|
44 |
#' |
|
45 |
#' db <- list(df1 = df1, fd2 = df2, df3 = df3) |
|
46 |
#' propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2")) |
|
47 |
#' |
|
48 |
propagate.list <- function(db, from, add, by, safe = TRUE) { |
|
49 | 3x |
checkmate::assert_list(db, types = "data.frame", names = "unique") |
50 | 3x |
checkmate::assert_names(names(db), must.include = from) |
51 | 3x |
checkmate::assert_names(colnames(db[[from]]), must.include = add) |
52 | 3x |
checkmate::assert_names(colnames(db[[from]]), must.include = by) |
53 | 3x |
checkmate::assert_flag(safe) |
54 | ||
55 | 3x |
if (safe) { |
56 | 2x |
keys <- db[[from]][, by] |
57 | 1x |
if (anyDuplicated(keys)) rlang::abort(paste("Duplicated key")) |
58 |
} |
|
59 | 2x |
toJoin <- db[[from]] |
60 | ||
61 | 2x |
for (tab_name in setdiff(names(db), from)) { |
62 | 3x |
tab_colnames <- colnames(db[[tab_name]]) |
63 | 3x |
if (!all(add %in% tab_colnames) && all(by %in% tab_colnames)) { |
64 | 2x |
missing_var <- setdiff(add, tab_colnames) |
65 | 2x |
sel_var <- c(missing_var, by) |
66 | 2x |
sel_tab <- toJoin[, sel_var] |
67 | ||
68 | 2x |
cat(paste0("\nUpdating: ", tab_name, " with: ", toString(missing_var))) |
69 | ||
70 | 2x |
db[[tab_name]] <- db[[tab_name]] %>% |
71 | 2x |
dplyr::left_join(sel_tab, by = by, multiple = "all") |
72 |
} else { |
|
73 | 1x |
cat(paste0("\nSkipping: ", tab_name)) |
74 |
} |
|
75 |
} |
|
76 | 2x |
cat("\n") |
77 | 2x |
return(db) |
78 |
} |
1 |
#' Encode Categorical Missing Values in a `list` of `data.frame` |
|
2 |
#' |
|
3 |
#' @details This is a helper function to encode missing values (i.e `NA` and `empty string`) of every `character` and |
|
4 |
#' `factor` variable found in a `list` of `data.frame`. The `label` attribute of the columns is preserved. |
|
5 |
#' |
|
6 |
#' @param data (`list` of `data.frame`) to be transformed. |
|
7 |
#' @param omit_tables (`character`) the names of the tables to omit from processing. |
|
8 |
#' @param omit_columns (`character`) the names of the columns to omit from processing. |
|
9 |
#' @param char_as_factor (`logical`) should character columns be converted into factor. |
|
10 |
#' @param na_level (`string`) the label to encode missing levels. |
|
11 |
#' @returns `list` of `data.frame` object with explicit missing levels. |
|
12 |
#' @export |
|
13 |
#' |
|
14 |
#' @examples |
|
15 |
#' df1 <- data.frame( |
|
16 |
#' "char" = c("a", "b", NA, "a", "k", "x"), |
|
17 |
#' "char2" = c("A", "B", NA, "A", "K", "X"), |
|
18 |
#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), |
|
19 |
#' "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA) |
|
20 |
#' ) |
|
21 |
#' df2 <- data.frame( |
|
22 |
#' "char" = c("a", "b", NA, "a", "k", "x"), |
|
23 |
#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), |
|
24 |
#' "num" = c(1:5, NA) |
|
25 |
#' ) |
|
26 |
#' df3 <- data.frame( |
|
27 |
#' "char" = c(NA, NA, "A") |
|
28 |
#' ) |
|
29 |
#' |
|
30 |
#' db <- list(df1 = df1, df2 = df2, df3 = df3) |
|
31 |
#' |
|
32 |
#' ls_explicit_na(db) |
|
33 |
#' ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2") |
|
34 |
#' |
|
35 |
ls_explicit_na <- function(data, |
|
36 |
omit_tables = NULL, |
|
37 |
omit_columns = NULL, |
|
38 |
char_as_factor = TRUE, |
|
39 |
na_level = "<Missing>") { |
|
40 | 3x |
checkmate::assert_list(data, types = "data.frame", names = "unique") |
41 | 3x |
checkmate::assert_character(omit_tables, null.ok = TRUE) |
42 | 3x |
checkmate::assert_character(omit_columns, null.ok = TRUE) |
43 | 3x |
checkmate::assert_flag(char_as_factor) |
44 | 3x |
checkmate::assert_string(na_level) |
45 | ||
46 | 3x |
modif_tab <- setdiff(names(data), omit_tables) |
47 | 3x |
if (length(modif_tab) < 1) { |
48 | 1x |
return(data) |
49 |
} |
|
50 | ||
51 | 2x |
data[modif_tab] <- lapply( |
52 | 2x |
data[modif_tab], |
53 | 2x |
h_df_explicit, |
54 | 2x |
omit_columns = omit_columns, |
55 | 2x |
char_as_factor = char_as_factor, |
56 | 2x |
na_level = na_level |
57 |
) |
|
58 | ||
59 | 2x |
data |
60 |
} |
|
61 | ||
62 |
#' Encode Categorical Missing Values in a `data.frame`. |
|
63 |
#' |
|
64 |
#' @inheritParams ls_explicit_na |
|
65 |
#' @returns a `data.frame` object with explicit missing levels. |
|
66 |
#' @keywords internal |
|
67 |
h_df_explicit <- function(df, |
|
68 |
omit_columns = NULL, |
|
69 |
char_as_factor = TRUE, |
|
70 |
na_level = "<Missing>") { |
|
71 | 3x |
na_list <- list(x = c("", NA)) |
72 | 3x |
names(na_list) <- na_level |
73 | 3x |
na_rule <- rule(.lst = na_list) |
74 | ||
75 | 3x |
df %>% |
76 | 3x |
mutate( |
77 | 3x |
across( |
78 | 3x |
where(~ is.character(.x) | is.factor(.x)) & !any_of(.env$omit_columns), |
79 | 3x |
~ reformat(.x, format = .env$na_rule, .string_as_fct = .env$char_as_factor, .na_last = TRUE) |
80 |
) |
|
81 |
) |
|
82 |
} |
1 |
#' Setting the Label Attribute |
|
2 |
#' |
|
3 |
#' @param var (`object`) whose label attribute can be set. |
|
4 |
#' @param label (`character`) the label to add. |
|
5 |
#' @returns `object` with label attribute. |
|
6 |
#' |
|
7 |
#' @export |
|
8 |
#' @examples |
|
9 |
#' x <- c(1:10) |
|
10 |
#' attr(x, "label") |
|
11 |
#' |
|
12 |
#' y <- attr_label(x, "my_label") |
|
13 |
#' attr(y, "label") |
|
14 |
attr_label <- function(var, label) { |
|
15 | 135x |
checkmate::assert_character(label) |
16 | ||
17 | 134x |
x <- var |
18 | 134x |
attr(x, "label") <- label |
19 | ||
20 | 134x |
x |
21 |
} |
|
22 | ||
23 |
#' Setting the Label Attribute to Data Frame Columns |
|
24 |
#' |
|
25 |
#' @param df (`data.frame`). |
|
26 |
#' @param label (`character`) the labels to add. |
|
27 |
#' @returns `data.frame` with label attributes. |
|
28 |
#' |
|
29 |
#' @export |
|
30 |
#' @examples |
|
31 |
#' res <- attr_label_df(mtcars, letters[1:11]) |
|
32 |
#' res |
|
33 |
#' lapply(res, attr, "label") |
|
34 |
attr_label_df <- function(df, label) { |
|
35 | 32x |
checkmate::assert_data_frame(df) |
36 | 32x |
checkmate::assert_character(label, len = ncol(df)) |
37 | ||
38 | 31x |
res <- mapply(attr_label, var = df, label = as.list(label), SIMPLIFY = FALSE) |
39 | 31x |
as.data.frame(res) |
40 |
} |
|
41 | ||
42 |
#' Getting Argument From System, Option or Default |
|
43 |
#' |
|
44 |
#' @param opt (`string`) the name of an option. |
|
45 |
#' @param sys (`string`) the name of an environment variable. |
|
46 |
#' @param default value to return if neither the environment variable nor the option are set. |
|
47 |
#' @param split (`string`) the pattern used to split the values obtained using environment variable. |
|
48 |
#' |
|
49 |
#' @returns if defined, the value of the option (`opt`), a `character` from the environment variable (`sys`) or the |
|
50 |
#' `default` in this order of priority. |
|
51 |
#' |
|
52 |
#' @export |
|
53 |
#' @examplesIf require("withr") |
|
54 |
#' get_arg("my.option", "MY_ARG", "default") |
|
55 |
#' withr::with_envvar(c(MY_ARG = "x;y"), get_arg("my.option", "MY_ARG", "default")) |
|
56 |
#' withr::with_options(c(my.option = "y"), get_arg("my.option", "MY_ARG", "default")) |
|
57 |
get_arg <- function(opt = NULL, sys = NULL, default = NULL, split = ";") { |
|
58 | 9x |
checkmate::assert_string(sys, null.ok = TRUE) |
59 | 9x |
checkmate::assert_string(opt, null.ok = TRUE) |
60 | 9x |
checkmate::assert_string(split) |
61 | ||
62 | 9x |
if (!is.null(opt)) { |
63 | 8x |
val <- getOption(opt, default = "") |
64 | 8x |
if (!identical(val, "")) { |
65 | 2x |
return(val) |
66 |
} |
|
67 |
} |
|
68 | ||
69 | 7x |
if (!is.null(sys)) { |
70 | 6x |
val <- Sys.getenv(sys, unset = "") |
71 | 6x |
if (!identical(val, "")) { |
72 | ! |
val <- stringr::str_split_1(val, split) |
73 | ! |
return(val) |
74 |
} |
|
75 |
} |
|
76 | ||
77 | 7x |
return(default) |
78 |
} |
1 |
#' Filter Data with Log |
|
2 |
#' @param data (`data.frame`) input data to subset, or named (`list` of `data.frame`). |
|
3 |
#' @param condition (`call`) of subset condition. Must evaluate as logical. |
|
4 |
#' @param suffix (`string`) optional argument describing the filter. |
|
5 |
#' @param ... further arguments to be passed to or from other methods. |
|
6 |
#' @returns a `data.frame` or `list` of `data.frame` filtered for the provided conditions. |
|
7 |
#' @details |
|
8 |
#' `log_filter` will filter the data/named list of data according to the `condition`. |
|
9 |
#' All the variables in `condition` must exist in the data (as variables) or in the parent |
|
10 |
#' frame(e.g., in global environment). |
|
11 |
#' For named list of data, if `ADSL` is available, `log_filter` will also try to subset all |
|
12 |
#' other datasets with `USUBJID`. |
|
13 |
#' @export |
|
14 |
log_filter <- function(data, condition, ...) { |
|
15 | 45x |
UseMethod("log_filter") |
16 |
} |
|
17 | ||
18 |
#' @rdname log_filter |
|
19 |
#' @export |
|
20 |
#' @examples |
|
21 |
#' data <- iris |
|
22 |
#' attr(data$Sepal.Length, "label") <- "cm" |
|
23 |
#' log_filter(data, Sepal.Length >= 7) |
|
24 |
#' |
|
25 |
log_filter.data.frame <- function(data, condition, suffix = NULL, ...) { |
|
26 | 25x |
checkmate::assert_string(suffix, null.ok = TRUE) |
27 | ||
28 | 25x |
condition <- match.call()$condition |
29 | 25x |
vars <- all.vars(condition) |
30 | 25x |
var_in_env <- vapply(vars, exists, envir = parent.frame(), inherits = TRUE, FUN.VALUE = TRUE) |
31 | 25x |
var_in_data <- vapply(vars, `%in%`, table = names(data), FUN.VALUE = TRUE) |
32 | 25x |
if (!all(var_in_env | var_in_data)) { |
33 | 1x |
rlang::abort(sprintf("Variable %s not found in data or environment.", toString(vars[!(var_in_data | var_in_env)]))) |
34 |
} |
|
35 | 24x |
res <- eval(bquote(dplyr::filter(data, .(condition)))) |
36 | 24x |
rows <- list(list(init = nrow(data), final = nrow(res), suffix = suffix)) |
37 | 24x |
rlbl <- paste0(deparse(condition), collapse = "") |
38 | 24x |
rlbl <- stringr::str_replace_all(rlbl, "\\s+", " ") |
39 | 24x |
names(rows) <- rlbl |
40 | 24x |
attr(res, "rows") <- c(attr(data, "rows"), rows) |
41 | ||
42 | 24x |
res |
43 |
} |
|
44 | ||
45 |
#' @rdname log_filter |
|
46 |
#' @param table (`string`) table name. |
|
47 |
#' @param by (`character`) variable names shared by `adsl` and other datasets for filtering. |
|
48 |
#' @param verbose (`flag`) whether to print a report about the filtering. |
|
49 |
#' @export |
|
50 |
#' @examples |
|
51 |
#' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0)) |
|
52 |
log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, ...) { |
|
53 | 20x |
checkmate::assert_list(data, types = "data.frame", names = "unique") |
54 | 20x |
assert_all_tablenames(data, table) |
55 | 19x |
checkmate::assert_names(colnames(data[[table]]), must.include = by) |
56 | 19x |
condition <- match.call()$condition |
57 | 19x |
data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix)))) |
58 | 19x |
if (identical(table, "adsl")) { |
59 | 8x |
for (k in setdiff(names(data), "adsl")) { |
60 | 8x |
if (all(by %in% names(data[[k]]))) { |
61 | 1x |
if (length(by) == 0) by <- intersect(names(data[[k]]), names(data$adsl)) |
62 | ||
63 | 8x |
ori_n <- nrow(data[[k]]) |
64 | 8x |
ori_att <- attr(data[[k]], "rows") |
65 | ||
66 | 8x |
data[[k]] <- dplyr::semi_join(data[[k]], data$adsl, by = by) |
67 | ||
68 | 8x |
rows <- list(list(init = ori_n, final = nrow(data[[k]]), suffix = suffix)) |
69 | 8x |
names(rows) <- paste0("Filtered by adsl: ", deparse(condition), collapse = "") |
70 | 8x |
attr(data[[k]], "rows") <- c(ori_att, rows) |
71 |
} |
|
72 |
} |
|
73 |
} |
|
74 | ||
75 | 19x |
if (verbose) { |
76 | 2x |
print_log(data, incl = FALSE, incl.adsl = TRUE) |
77 |
} |
|
78 | ||
79 | 19x |
return(data) |
80 |
} |
|
81 | ||
82 |
# Get Log ---- |
|
83 | ||
84 |
#' Get Log |
|
85 |
#' |
|
86 |
#' @param data (`list` of `data.frame` or `data.frame`) filtered with `log_filter`. |
|
87 |
#' @param incl (`flag`) should information about unfiltered `data.frame` be printed. |
|
88 |
#' @param incl.adsl (`flag`) should indication of filtering performed through `adsl` be printed. |
|
89 |
#' @returns `character` or `list of character` describing the filtering applied to `data`. |
|
90 |
#' |
|
91 |
#' @export |
|
92 |
get_log <- function(data, incl, incl.adsl) { |
|
93 | 43x |
UseMethod("get_log") |
94 |
} |
|
95 | ||
96 |
#' @rdname get_log |
|
97 |
#' @export |
|
98 |
#' @examples |
|
99 |
#' data <- log_filter(iris, Sepal.Length >= 7, "xx") |
|
100 |
#' data <- log_filter(data, Sepal.Length < 2) |
|
101 |
#' data <- log_filter(data, Sepal.Length >= 2, "yy") |
|
102 |
#' get_log(data) |
|
103 |
#' |
|
104 |
get_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) { |
|
105 | 29x |
checkmate::assert_flag(incl) |
106 | ||
107 | 29x |
att <- attr(data, "rows") |
108 | 29x |
if (!incl.adsl) { |
109 | 4x |
sel <- grepl("Filtered by adsl", names(att)) |
110 | 4x |
att <- att[!sel] |
111 |
} |
|
112 | ||
113 | 29x |
if (length(att) != 0L) { |
114 | 19x |
start_row <- lapply(att, "[[", "init") |
115 | 19x |
end_row <- lapply(att, "[[", "final") |
116 | 19x |
suffix <- lapply(att, "[[", "suffix") |
117 | 19x |
suffix <- vapply(suffix, function(x) ifelse(is.null(x), "", paste0(x, ": ")), character(1)) |
118 | 19x |
res <- paste0(suffix, names(att), " [", start_row, " --> ", end_row, " rows.]") |
119 | 10x |
} else if (incl) { |
120 | 6x |
paste0("No filtering [", nrow(data), " rows.]") |
121 |
} else { |
|
122 | 4x |
NULL |
123 |
} |
|
124 |
} |
|
125 | ||
126 | ||
127 |
#' @rdname get_log |
|
128 |
#' @export |
|
129 |
#' @examples |
|
130 |
#' data <- log_filter( |
|
131 |
#' list(iris1 = iris, iris2 = iris), |
|
132 |
#' Sepal.Length >= 7, |
|
133 |
#' "iris1", |
|
134 |
#' character(0), |
|
135 |
#' "Sep" |
|
136 |
#' ) |
|
137 |
#' get_log(data) |
|
138 |
#' |
|
139 |
get_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) { |
|
140 | 14x |
checkmate::assert_list(data, types = "data.frame", names = "unique") |
141 | 14x |
checkmate::assert_flag(incl) |
142 | ||
143 | 14x |
lapply(data, get_log, incl = incl, incl.adsl = incl.adsl) |
144 |
} |
|
145 | ||
146 |
# Print Log ---- |
|
147 | ||
148 |
#' Print Log |
|
149 |
#' |
|
150 |
#' @inheritParams get_log |
|
151 |
#' @returns `NULL`. Print a description of the filtering applied to `data`. |
|
152 |
#' @export |
|
153 |
#' |
|
154 |
print_log <- function(data, incl, incl.adsl) { |
|
155 | 12x |
UseMethod("print_log") |
156 |
} |
|
157 | ||
158 |
#' @rdname print_log |
|
159 |
#' @export |
|
160 |
#' @examples |
|
161 |
#' data <- log_filter(iris, Sepal.Length >= 7, "Sep") |
|
162 |
#' print_log(data) |
|
163 |
print_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) { |
|
164 | 3x |
checkmate::assert_flag(incl) |
165 | ||
166 | 3x |
cat("Filter Log:") |
167 | 3x |
cat(paste0("\n ", get_log(data, incl = incl, incl.adsl = incl.adsl))) |
168 | 3x |
cat("\n") |
169 | 3x |
invisible() |
170 |
} |
|
171 | ||
172 |
#' @rdname print_log |
|
173 |
#' @export |
|
174 |
#' @examples |
|
175 |
#' data <- log_filter( |
|
176 |
#' list( |
|
177 |
#' adsl = iris, |
|
178 |
#' iris2 = iris, |
|
179 |
#' mtcars = mtcars, |
|
180 |
#' iris3 = iris |
|
181 |
#' ), |
|
182 |
#' Sepal.Length >= 7, |
|
183 |
#' "adsl", |
|
184 |
#' character(0), |
|
185 |
#' "adsl filter" |
|
186 |
#' ) |
|
187 |
#' data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter") |
|
188 |
#' print_log(data) |
|
189 |
#' print_log(data, incl = FALSE) |
|
190 |
#' print_log(data, incl.adsl = FALSE, incl = FALSE) |
|
191 |
print_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) { |
|
192 | 9x |
checkmate::assert_list(data, types = "data.frame", names = "unique") |
193 | 9x |
checkmate::assert_flag(incl) |
194 | ||
195 | 9x |
filter_log <- get_log(data, incl = incl, incl.adsl = incl.adsl) |
196 | ||
197 | 9x |
if (!incl) { |
198 | 4x |
filter_log <- filter_log[!vapply(filter_log, is.null, logical(1))] |
199 |
} |
|
200 | ||
201 | 9x |
cat("Filter Log:") |
202 | 9x |
if (length(filter_log) == 0) { |
203 | 1x |
cat("\n No filtering") |
204 |
} else { |
|
205 | 8x |
mapply( |
206 | 8x |
function(x, y) { |
207 | 13x |
cat(paste0("\n - ", x, ":")) |
208 | 13x |
cat(paste0("\n ", y, "")) |
209 |
}, |
|
210 | 8x |
as.list(names(filter_log)), |
211 | 8x |
filter_log |
212 |
) |
|
213 |
} |
|
214 | 9x |
cat("\n") |
215 | ||
216 | 9x |
invisible() |
217 |
} |
1 |
#' Unite Columns of a Table in a `list` of `data.frame`. |
|
2 |
#' |
|
3 |
#' @param adam_db (`list` of `data.frames`) to be transformed. |
|
4 |
#' @param tab (`string`) the name of a table in the `adam_db` object. |
|
5 |
#' @param cols (`character`) the name of the columns to unite. |
|
6 |
#' @param sep (`string`) the separator for the new column name. |
|
7 |
#' @param new (`string`) the name of the new column. If `NULL` the concatenation of `cols` separated by `sep` is used. |
|
8 |
#' |
|
9 |
#' @returns `list` of `data.frames` object with a united column. |
|
10 |
#' @export |
|
11 |
#' |
|
12 |
#' @examples |
|
13 |
#' db <- list(mtcars = mtcars, iris = iris) |
|
14 |
#' |
|
15 |
#' x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION") |
|
16 |
#' x$mtcars |
|
17 |
ls_unite <- function(adam_db, tab, cols, sep = ".", new = NULL) { |
|
18 | 4x |
checkmate::assert_list(adam_db, types = "data.frame") |
19 | 4x |
checkmate::assert_string(tab) |
20 | 4x |
checkmate::assert_names(names(adam_db), must.include = tab) |
21 | 4x |
checkmate::assert_character(cols, min.len = 1) |
22 | 4x |
checkmate::assert_names(names(adam_db[[tab]]), must.include = cols) |
23 | 4x |
checkmate::assert_string(sep) |
24 | 4x |
checkmate::assert_string(new, null.ok = TRUE) |
25 | ||
26 | 4x |
x_interaction <- if (!is.null(new)) { |
27 | 3x |
new |
28 |
} else { |
|
29 | 1x |
paste(cols, collapse = sep) |
30 |
} |
|
31 | ||
32 | 4x |
x_df <- adam_db[[tab]][, cols, drop = FALSE] |
33 | 4x |
lvl <- lapply(x_df, function(y) { |
34 | 9x |
uni <- if (is.factor(y)) { |
35 | 7x |
levels(y) |
36 |
} else { |
|
37 | 2x |
unique(y) |
38 |
} |
|
39 | 9x |
factor(uni, levels = uni) |
40 |
}) |
|
41 | ||
42 | 4x |
all_lvl_df <- expand.grid(lvl) |
43 | ||
44 | 4x |
all_lvl <- all_lvl_df[, cols, drop = FALSE] %>% |
45 | 4x |
arrange(across(all_of(cols))) %>% |
46 | 4x |
apply(1, paste, collapse = sep) |
47 | ||
48 | 4x |
x_vec <- x_df[, cols, drop = FALSE] %>% |
49 | 4x |
apply(1, paste, collapse = sep) |
50 | ||
51 | 4x |
existing_lvl <- intersect(all_lvl, x_vec) |
52 | 4x |
x_fact <- factor(x_vec, existing_lvl) |
53 | ||
54 | 4x |
adam_db[[tab]][, x_interaction] <- x_fact |
55 | 4x |
adam_db |
56 |
} |
1 |
#' Transforming data.frame with Multiple Identifying columns into Wide Format |
|
2 |
#' |
|
3 |
#' @details This function allows to identify observations on the basis of several columns. Warning: Instead of nesting |
|
4 |
#' duplicated values, the function will throw an error if the same parameter is provided twice for the same |
|
5 |
#' observation. |
|
6 |
#' |
|
7 |
#' @param data (`data.frame`) to be pivoted. |
|
8 |
#' @param id (`character`) the name of the columns whose combination uniquely identify the observations. |
|
9 |
#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The |
|
10 |
#' unique values in this column will become column names in the output. |
|
11 |
#' @param value_from (`character`) the name of the column containing the values that will populate the output. |
|
12 |
#' @param drop_na (`logical`) should column containing only `NAs` be dropped. |
|
13 |
#' @param drop_lvl (`logical`) should missing levels be dropped in the columns coming from (`value_from`). |
|
14 |
#' |
|
15 |
#' @returns `data.frame` in a wide format. |
|
16 |
#' |
|
17 |
#' @export |
|
18 |
#' @examples |
|
19 |
#' test_data <- data.frame( |
|
20 |
#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), |
|
21 |
#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), |
|
22 |
#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), |
|
23 |
#' the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE) |
|
24 |
#' ) |
|
25 |
#' |
|
26 |
#' multi_id_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val") |
|
27 |
#' multi_id_pivot_wider(test_data, "the_obs2", "the_param", "the_val") |
|
28 |
multi_id_pivot_wider <- function(data, |
|
29 |
id, |
|
30 |
param_from, |
|
31 |
value_from, |
|
32 |
drop_na = FALSE, |
|
33 |
drop_lvl = FALSE) { |
|
34 |
# check for duplication of observation-parameter |
|
35 | 33x |
checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3) |
36 | 33x |
checkmate::assert_character(id) |
37 | 33x |
checkmate::assert_character(param_from, len = 1) |
38 | 33x |
checkmate::assert_character(value_from, len = 1) |
39 | 33x |
checkmate::assert_false(any(duplicated(data[, c(id, param_from)]))) |
40 | 32x |
checkmate::assert_subset(c(id, param_from, value_from), colnames(data)) |
41 | 32x |
checkmate::assert_flag(drop_na) |
42 | 32x |
checkmate::assert_flag(drop_lvl) |
43 | ||
44 |
# find a way to sort |
|
45 | 32x |
unique_id <- unique(data[id]) |
46 | 32x |
key <- apply(unique_id[id], 1, paste, collapse = "-") |
47 | 32x |
unique_id <- cbind(key, unique_id) |
48 | ||
49 | 32x |
param <- data[[param_from]] |
50 | ||
51 | 32x |
mini_data <- data[, c(param_from, value_from)] |
52 | 32x |
f_key <- apply(data[id], 1, paste, collapse = "-") |
53 | 32x |
mini_data <- cbind(f_key, mini_data) |
54 | ||
55 | 32x |
data_ls <- split(mini_data, param) |
56 | ||
57 |
# Transform to named vector, the first column is the key. |
|
58 | 32x |
data_vec <- |
59 | 32x |
lapply( |
60 | 32x |
data_ls, |
61 | 32x |
function(x) setNames(x[[value_from]], x[, 1]) |
62 |
) |
|
63 | ||
64 | 32x |
if (drop_lvl) { |
65 | 3x |
data_vec <- rapply(data_vec, droplevels, classes = "factor", how = "replace") |
66 |
} |
|
67 | ||
68 |
# query each id in each param |
|
69 | 32x |
all_vec <- lapply(data_vec, function(x) x[unique_id[, 1]]) |
70 | ||
71 | 26x |
if (drop_na) all_vec <- Filter(function(x) !all(is.na(x)), all_vec) |
72 | ||
73 | 32x |
all_vec <- lapply(all_vec, unname) |
74 | 32x |
bind_data <- do.call(dplyr::bind_cols, all_vec) |
75 | ||
76 | 32x |
res <- dplyr::bind_cols(unique_id[, -1, drop = FALSE], bind_data) |
77 | ||
78 | 32x |
rownames(res) <- NULL |
79 | 32x |
res |
80 |
} |
|
81 | ||
82 |
#' Transforming data.frame with multiple Data Column into Wide Format |
|
83 |
#' |
|
84 |
#' @details This function is adapted to cases where the data are distributed in several columns while the name of the |
|
85 |
#' parameter is in one. Typical example is `adsub` where numeric data are stored in `AVAL` while categorical data are |
|
86 |
#' in `AVALC`. |
|
87 |
#' |
|
88 |
#' @param data (`data.frame`) to be pivoted. |
|
89 |
#' @param id (`character`) the name of the columns whose combination uniquely identify the observations. |
|
90 |
#' @param param_from (`character`) the name of the columns containing the names of the parameters to be pivoted. The |
|
91 |
#' unique values in this column will become column names in the output. |
|
92 |
#' @param value_from (`character`) the name of the column containing the values that will populate the output. |
|
93 |
#' @param labels_from (`character`) the name of the column congaing the labels of the new columns. from. If not |
|
94 |
#' provided, the labels will be equal to the column names. When several labels are available for the same column, the |
|
95 |
#' first one will be selected. |
|
96 |
#' @param drop_na (`logical`) should column containing only `NAs` be dropped. |
|
97 |
#' @param drop_lvl (`logical`) should missing levels be dropped in the columns coming from `value_from`. |
|
98 |
#' |
|
99 |
#' @returns `list` of `data.frame` in a wide format with label attribute attached to each columns. |
|
100 |
#' |
|
101 |
#' @export |
|
102 |
#' @examples |
|
103 |
#' test_data <- data.frame( |
|
104 |
#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), |
|
105 |
#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), |
|
106 |
#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), |
|
107 |
#' the_label = c( |
|
108 |
#' "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)", |
|
109 |
#' "Gender", "Height (cm)", "Height (cm)", "Pre-condition" |
|
110 |
#' ), |
|
111 |
#' the_val = c(65, 165, NA, 66, NA, 166, 155, NA), |
|
112 |
#' the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE) |
|
113 |
#' ) |
|
114 |
#' |
|
115 |
#' x <- poly_pivot_wider( |
|
116 |
#' test_data, |
|
117 |
#' c("the_obs", "the_obs2"), |
|
118 |
#' "the_param", |
|
119 |
#' c("the_val", "the_val2"), |
|
120 |
#' "the_label" |
|
121 |
#' ) |
|
122 |
#' x |
|
123 |
#' Reduce(function(u, v) merge(u, v, all = TRUE), x) |
|
124 |
poly_pivot_wider <- function(data, |
|
125 |
id, |
|
126 |
param_from, |
|
127 |
value_from, |
|
128 |
labels_from = NULL, |
|
129 |
drop_na = TRUE, |
|
130 |
drop_lvl = FALSE) { |
|
131 |
# other tests are performed at lower levels. |
|
132 | 13x |
checkmate::assert_character(value_from, unique = TRUE) |
133 | ||
134 |
# Create new labels for new columns. |
|
135 | 13x |
if (is.null(labels_from) || labels_from == param_from) { |
136 | 1x |
new_labels <- unique(data[[param_from]]) |
137 | 1x |
names(new_labels) <- new_labels |
138 |
} else { |
|
139 | 12x |
checkmate::assert_character(labels_from, len = 1) |
140 | 12x |
checkmate::assert_subset(labels_from, colnames(data)) |
141 | ||
142 | 12x |
new_labels_df <- data[, c(labels_from, param_from)] |
143 | 12x |
new_labels_df <- unique(new_labels_df) |
144 | ||
145 | 12x |
new_labels <- as.character(new_labels_df[[labels_from]]) |
146 | 12x |
names(new_labels) <- as.character(new_labels_df[[param_from]]) |
147 |
} |
|
148 | ||
149 |
# Retrieve old labels. |
|
150 | 13x |
old_labels <- lapply(data, attr, "label") |
151 | 13x |
n_old_label <- names(old_labels) |
152 | 13x |
null_label <- unlist(lapply(old_labels, is.null)) |
153 | 13x |
old_labels[null_label] <- n_old_label[null_label] |
154 | 13x |
old_labels <- unlist(old_labels) |
155 | ||
156 | 13x |
all_labels <- c(new_labels, old_labels) |
157 | ||
158 | 13x |
res_ls <- list() |
159 | 13x |
for (n_value_from in value_from) { |
160 | 26x |
res <- multi_id_pivot_wider( |
161 | 26x |
data = data, |
162 | 26x |
id = id, |
163 | 26x |
param_from = param_from, |
164 | 26x |
value_from = n_value_from, |
165 | 26x |
drop_na = drop_na, |
166 | 26x |
drop_lvl = drop_lvl |
167 |
) |
|
168 | ||
169 | 26x |
res <- attr_label_df(res, all_labels[colnames(res)]) |
170 | 26x |
res_ls[[n_value_from]] <- res |
171 |
} |
|
172 | 13x |
res_ls |
173 |
} |
1 |
#' Reorder Two Columns Levels Simultaneously |
|
2 |
#' |
|
3 |
#' @details The function expect a 1:1 matching between the elements of the two selected column. |
|
4 |
#' |
|
5 |
#' @param df (`data.frame`) with two column whose factors should be reordered. |
|
6 |
#' @param primary (`string`) the name of the column on which the levels reordering should be based. |
|
7 |
#' @param secondary (`string`) the name of the column whose levels should be reordered following the levels of the |
|
8 |
#' primary column. |
|
9 |
#' @param levels_primary (`character`) the levels in the desired order. Existing levels that are not included will be |
|
10 |
#' placed afterward in their current order. |
|
11 |
#' @returns a `data.frame` with the `secondary` column converted to factor with reordered levels. |
|
12 |
#' |
|
13 |
#' @export |
|
14 |
#' |
|
15 |
#' @examples |
|
16 |
#' df <- data.frame( |
|
17 |
#' SUBJID = 1:3, |
|
18 |
#' PARAMCD = factor(c("A", "B", "C")), |
|
19 |
#' PARAM = factor(paste("letter", LETTERS[1:3])) |
|
20 |
#' ) |
|
21 |
#' co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B")) |
|
22 |
co_relevels <- function(df, primary, secondary, levels_primary) { |
|
23 | 6x |
checkmate::assert_data_frame(df, min.rows = 1) |
24 | 6x |
checkmate::assert_subset(c(primary, secondary), colnames(df)) |
25 | 6x |
checkmate::assert_character(levels_primary, min.len = 1) |
26 | 6x |
checkmate::assert_vector(df[[primary]], any.missing = FALSE) |
27 | 5x |
checkmate::assert_vector(df[[secondary]], any.missing = FALSE) |
28 | ||
29 | 5x |
df[, primary] <- as.factor(df[[primary]]) |
30 | 5x |
df[, secondary] <- as.factor(df[[secondary]]) |
31 | ||
32 |
# check unique relationship |
|
33 | 5x |
df_key <- df[, c(primary, secondary)] |
34 | 5x |
df_key <- unique(df_key) |
35 | ||
36 | 5x |
if (any(duplicated(df_key[[primary]])) || any(duplicated(df_key[[secondary]]))) { |
37 | 1x |
rlang::abort("non univoque relation between values in primary and secondary column") |
38 |
} |
|
39 | ||
40 | 4x |
keys <- setNames(as.character(df_key[[secondary]]), as.character(df_key[[primary]])) |
41 | ||
42 | 4x |
all_levels_primary <- c(levels_primary, setdiff(levels(df[[primary]]), levels_primary)) |
43 | 4x |
all_levels_secondary <- keys[all_levels_primary] |
44 | ||
45 | 4x |
df[, primary] <- factor(df[[primary]], all_levels_primary) |
46 | 4x |
df[, secondary] <- factor(df[[secondary]], all_levels_secondary) |
47 | ||
48 | 4x |
df |
49 |
} |
1 |
#' Join `adsub` to `adsl` |
|
2 |
#' |
|
3 |
#' @param adam_db (`list` of `data.frame`) object input with an `adsl` and `adsub` table. |
|
4 |
#' @param keys (`character`) the name of the columns in `adsl` uniquely identifying a row. |
|
5 |
#' @param continuous_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which |
|
6 |
#' columns containing continuous values should be created. If `"all"`, all parameter values are selected, if `NULL`, |
|
7 |
#' none are selected. |
|
8 |
#' @param categorical_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which |
|
9 |
#' columns containing categorical values should be created. If `"all"`, all parameter values are selected, if `NULL`, |
|
10 |
#' none are selected. |
|
11 |
#' @param continuous_suffix (`string`) the suffixes to add to the newly generated columns containing continuous values. |
|
12 |
#' @param categorical_suffix (`string`) the suffixes to add to the newly generated columns containing categorical |
|
13 |
#' values. |
|
14 |
#' @param drop_na (`logical`) whether resulting columns containing only `NAs` should be dropped. |
|
15 |
#' @param drop_lvl (`logical`) should missing levels be dropped in the resulting columns. |
|
16 |
#' |
|
17 |
#' @returns a `list` of `data.frame` with new columns in the `adsl` table. |
|
18 |
#' |
|
19 |
#' @rdname join_adsub_adsl |
|
20 |
#' @export |
|
21 |
#' |
|
22 |
join_adsub_adsl <- function(adam_db, |
|
23 |
keys, |
|
24 |
continuous_var, |
|
25 |
categorical_var, |
|
26 |
continuous_suffix, |
|
27 |
categorical_suffix, |
|
28 |
drop_na = TRUE, |
|
29 |
drop_lvl = TRUE) { |
|
30 | 11x |
UseMethod("join_adsub_adsl") |
31 |
} |
|
32 | ||
33 |
#' @rdname join_adsub_adsl |
|
34 |
#' @export |
|
35 |
#' |
|
36 |
#' @examples |
|
37 |
#' adsl <- data.frame( |
|
38 |
#' USUBJID = c("S1", "S2", "S3", "S4"), |
|
39 |
#' STUDYID = "My_study", |
|
40 |
#' AGE = c(60, 44, 23, 31) |
|
41 |
#' ) |
|
42 |
#' |
|
43 |
#' adsub <- data.frame( |
|
44 |
#' USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"), |
|
45 |
#' STUDYID = "My_study", |
|
46 |
#' PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"), |
|
47 |
#' PARAMCD = c("w", "w", "w", "w", "h", "h", "h"), |
|
48 |
#' AVAL = c(98, 75, 70, 71, 182, 155, 152), |
|
49 |
#' AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180") |
|
50 |
#' ) |
|
51 |
#' |
|
52 |
#' db <- list(adsl = adsl, adsub = adsub) |
|
53 |
#' |
|
54 |
#' x <- join_adsub_adsl(adam_db = db) |
|
55 |
#' x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h") |
|
56 |
join_adsub_adsl.list <- function(adam_db, |
|
57 |
keys = c("USUBJID", "STUDYID"), |
|
58 |
continuous_var = "all", |
|
59 |
categorical_var = "all", |
|
60 |
continuous_suffix = "", |
|
61 |
categorical_suffix = "_CAT", |
|
62 |
drop_na = TRUE, |
|
63 |
drop_lvl = FALSE) { |
|
64 | 11x |
checkmate::assert_list(adam_db, types = "data.frame") |
65 | 11x |
checkmate::assert_names(names(adam_db), must.include = c("adsl", "adsub")) |
66 | 11x |
checkmate::assert_names(names(adam_db$adsub), must.include = c("PARAM", "PARAMCD", "AVAL", "AVALC", keys)) |
67 | 11x |
checkmate::assert_names(names(adam_db$adsl), must.include = keys) |
68 | 11x |
checkmate::assert_numeric(adam_db$adsub$AVAL) |
69 | 11x |
checkmate::assert_multi_class(adam_db$adsub$AVALC, c("character", "factor")) |
70 | 11x |
checkmate::assert_string(continuous_suffix) |
71 | 11x |
checkmate::assert_string(categorical_suffix) |
72 | 11x |
checkmate::assert_flag(drop_na) |
73 | 11x |
checkmate::assert_flag(drop_lvl) |
74 | ||
75 |
# Empty strings in AVALC are treated as NA. |
|
76 | 11x |
adam_db$adsub$AVALC[adam_db$adsub$AVALC == ""] <- NA |
77 | ||
78 | 11x |
value_col <- c("AVAL", "AVALC") |
79 | 11x |
vars_ls <- list(continuous_var, categorical_var) |
80 | 11x |
suffix_ls <- list(continuous_suffix, categorical_suffix) |
81 | ||
82 |
# Select variables names. |
|
83 | 11x |
vars_ls <- lapply(vars_ls, function(x) { |
84 | 22x |
if (identical(x, "all")) { |
85 | 18x |
unique(adam_db$adsub$PARAMCD) |
86 |
} else { |
|
87 | 4x |
x |
88 |
} |
|
89 |
}) |
|
90 | ||
91 |
# Create new variable names. |
|
92 | 11x |
vars_nam <- mapply( |
93 | 11x |
function(x, y) { |
94 | 22x |
if (!is.null(x)) { |
95 | 18x |
names(x) <- paste0(x, y) |
96 | 18x |
x |
97 |
} else { |
|
98 | 4x |
NULL |
99 |
} |
|
100 |
}, |
|
101 | 11x |
vars_ls, |
102 | 11x |
suffix_ls, |
103 | 11x |
SIMPLIFY = FALSE |
104 |
) |
|
105 | ||
106 |
# Test if new columns already exist in adsl. |
|
107 | 11x |
assert_names_notadsl(vars_nam, adam_db$adsl) |
108 | ||
109 |
# Test if categorical and continuous column will result in the same column name. |
|
110 | 11x |
assert_names_collision(vars_nam) |
111 | ||
112 |
# Pivot and keep labels. |
|
113 | 11x |
adsub_wide_ls <- |
114 | 11x |
adam_db$adsub %>% |
115 | 11x |
poly_pivot_wider( |
116 | 11x |
id = keys, |
117 | 11x |
param_from = "PARAMCD", |
118 | 11x |
value_from = value_col, |
119 | 11x |
labels_from = "PARAM", |
120 | 11x |
drop_na = drop_na, |
121 | 11x |
drop_lvl = drop_lvl |
122 |
) |
|
123 | ||
124 |
# Merge categorical and continuous variables. |
|
125 | 11x |
for (i in seq_along(value_col)) { |
126 | 22x |
adsub_df <- adsub_wide_ls[[value_col[i]]] |
127 | ||
128 |
# Warning if some columns are entirely NA, hence discarded. |
|
129 | 22x |
not_cols <- setdiff(vars_nam[[i]], colnames(adsub_df)) |
130 | 22x |
if (length(not_cols) > 0) { |
131 | 2x |
type <- ifelse(value_col[i] == "AVALC", "Categorical", "Continuous") |
132 | 2x |
arg_type <- ifelse(value_col[i] == "AVALC", "categorical_var", "continuous_var") |
133 | 2x |
warning( |
134 | 2x |
sprintf( |
135 | 2x |
"Dropping %s for %s type, No data available. Adjust `%s` argument to silence this warning or set `drop_na = FALSE`", # nolint |
136 | 2x |
toString(not_cols), |
137 | 2x |
type, |
138 | 2x |
arg_type |
139 |
) |
|
140 |
) |
|
141 |
} |
|
142 | ||
143 |
# Preserving names. |
|
144 | 22x |
common_cols_id <- c(vars_nam[[i]]) %in% colnames(adsub_df) |
145 | 22x |
common_cols <- vars_nam[[i]][common_cols_id] |
146 | ||
147 | 22x |
adsub_df <- adsub_df[, c(keys, as.character(common_cols)), drop = FALSE] |
148 | 22x |
colnames(adsub_df) <- c(keys, names(common_cols)) |
149 | ||
150 | 22x |
adam_db$adsl <- dplyr::left_join( |
151 | 22x |
x = adam_db$adsl, |
152 | 22x |
y = adsub_df, |
153 | 22x |
by = keys |
154 |
) |
|
155 |
} |
|
156 | ||
157 | 11x |
adam_db |
158 |
} |
|
159 | ||
160 |
# Utility functions ---- |
|
161 | ||
162 |
assert_names_collision <- function(vars_nam) { |
|
163 | 11x |
final_names_ls <- lapply(vars_nam, names) |
164 | 11x |
in_both <- final_names_ls[[1]] %in% final_names_ls[[2]] |
165 | 11x |
if (any(in_both)) { |
166 | ! |
rlang::warn( |
167 | ! |
paste( |
168 | ! |
toString(final_names_ls[[1]][in_both]), |
169 | ! |
"are new columns for continuous and categorical variable, |
170 | ! |
Please set different `continuous_suffix` or `categorical_suffix` |
171 | ! |
or select different columns to avoid automatic renaming." |
172 |
) |
|
173 |
) |
|
174 |
} |
|
175 |
} |
|
176 | ||
177 |
assert_names_notadsl <- function(vars_nam, df) { |
|
178 | 11x |
final_names <- unique(sapply(vars_nam, names)) |
179 | 11x |
already_in_adsl <- final_names %in% colnames(df) |
180 | 11x |
if (any(already_in_adsl)) { |
181 | 1x |
rlang::warn( |
182 | 1x |
paste( |
183 | 1x |
toString(final_names[already_in_adsl]), |
184 | 1x |
"already exist in adsl, the name will default to another values. |
185 | 1x |
Please change `continuous_suffix` or `categorical_suffix` to avoid automatic renaming" |
186 |
) |
|
187 |
) |
|
188 |
} |
|
189 |
} |
1 |
#' Cutting data by group |
|
2 |
#' |
|
3 |
#' @details Function used to categorize numeric data stored in long format depending on their group. Intervals are |
|
4 |
#' closed on the right (and open on the left). |
|
5 |
#' |
|
6 |
#' @param df (`dataframe`) with a column of data to be cut and a column specifying the group of each observation. |
|
7 |
#' @param col_data (`character`) the column containing the data to be cut. |
|
8 |
#' @param col_group (`character`) the column containing the names of the groups according to which the data should be |
|
9 |
#' split. |
|
10 |
#' @param group (`nested list`) providing for each parameter value that should be analyzed in a categorical way: the |
|
11 |
#' name of the parameter (`character`), a series of breakpoints (`numeric`) where the first breakpoints is typically |
|
12 |
#' `-Inf` and the last `Inf`, and a series of name which will describe each category (`character`). |
|
13 |
#' @param cat_col (`character`) the name of the new column in which the cut label should he stored. |
|
14 |
#' @returns `data.frame` with a column containing categorical values. |
|
15 |
#' @export |
|
16 |
#' |
|
17 |
#' @examples |
|
18 |
#' group <- list( |
|
19 |
#' list( |
|
20 |
#' "Height", |
|
21 |
#' c(-Inf, 150, 170, Inf), |
|
22 |
#' c("=<150", "150-170", ">170") |
|
23 |
#' ), |
|
24 |
#' list( |
|
25 |
#' "Weight", |
|
26 |
#' c(-Inf, 65, Inf), |
|
27 |
#' c("=<65", ">65") |
|
28 |
#' ), |
|
29 |
#' list( |
|
30 |
#' "Age", |
|
31 |
#' c(-Inf, 31, Inf), |
|
32 |
#' c("=<31", ">31") |
|
33 |
#' ), |
|
34 |
#' list( |
|
35 |
#' "PreCondition", |
|
36 |
#' c(-Inf, 1, Inf), |
|
37 |
#' c("=<1", "<1") |
|
38 |
#' ) |
|
39 |
#' ) |
|
40 |
#' data <- data.frame( |
|
41 |
#' SUBJECT = rep(letters[1:10], 4), |
|
42 |
#' PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10), |
|
43 |
#' AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)), |
|
44 |
#' index = 1:40 |
|
45 |
#' ) |
|
46 |
#' |
|
47 |
#' cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories") |
|
48 |
cut_by_group <- function(df, |
|
49 |
col_data, |
|
50 |
col_group, |
|
51 |
group, |
|
52 |
cat_col) { |
|
53 | 4x |
checkmate::assert_data_frame(df) |
54 | 4x |
checkmate::assert_subset(c(col_data, col_group), colnames(df)) |
55 | 4x |
checkmate::assert_numeric(df[, col_data]) |
56 | 4x |
checkmate::assert_list(group) |
57 | ||
58 | 4x |
lapply( |
59 | 4x |
group, |
60 | 4x |
function(list_element) { |
61 | 11x |
checkmate::assert_list(list_element, len = 3, types = c("character", "numeric", "character")) |
62 |
} |
|
63 |
) |
|
64 | ||
65 | 4x |
df[cat_col] <- NA |
66 | ||
67 | 4x |
for (g in group) { |
68 | 10x |
selected_row <- df[[col_group]] == g[[1]] |
69 | ||
70 | 10x |
df[selected_row, cat_col] <- as.character(cut(df[[col_data]][selected_row], breaks = g[[2]], labels = g[[3]])) |
71 |
} |
|
72 | 3x |
df |
73 |
} |
1 |
whisker_env <- NULL |
|
2 | ||
3 |
.onLoad <- function(libname, pkgname) { |
|
4 | ! |
default_whiskers <- c(patient_label = "patients") |
5 | ! |
whisker_env <<- new.env(parent = globalenv()) |
6 | ! |
add_whisker(default_whiskers) |
7 |
} |