1 |
#' Transforming data.frame into Wide Format |
|
2 |
#' |
|
3 |
#' @details instead of nesting duplicated values, the function will throw an error if the same parameter is |
|
4 |
#' provided twice for the same observation. |
|
5 |
#' |
|
6 |
#' @param data (`data.frame`) to be pivoted. |
|
7 |
#' @param id (`character`) the name of the column identifying the observations. It will correspond to the row names |
|
8 |
#' of the output. |
|
9 |
#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The |
|
10 |
#' unique values in this column will become column names in the output. |
|
11 |
#' @param value_from (`character`) the name of the column containing the values that will populate the output. |
|
12 |
#' |
|
13 |
#' @return `data.frame` in a wide format. |
|
14 |
#' |
|
15 |
#' @export |
|
16 |
#' @examples |
|
17 |
#' test_data <- data.frame( |
|
18 |
#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), |
|
19 |
#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), |
|
20 |
#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), |
|
21 |
#' the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE) |
|
22 |
#' ) |
|
23 |
#' |
|
24 |
#' mini_pivot_wider(test_data, "the_obs", "the_param", "the_val") |
|
25 |
mini_pivot_wider <- function(data, |
|
26 |
id, |
|
27 |
param_from, |
|
28 |
value_from) { |
|
29 |
# check for duplication of observation-parameter |
|
30 | 3x |
checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3) |
31 | 3x |
checkmate::assert_character(id, len = 1) |
32 | 3x |
checkmate::assert_character(param_from, len = 1) |
33 | 1x |
checkmate::assert_character(value_from, len = 1) |
34 | 1x |
checkmate::assert_subset(c(id, param_from, value_from), colnames(data)) |
35 | 1x |
checkmate::assert_false(any(duplicated(data[, c(id, param_from)]))) |
36 | ||
37 | 1x |
unique_id <- sort(unique(data[[id]])) |
38 | 1x |
param <- data[[param_from]] |
39 | ||
40 | 1x |
mini_data <- data[, c(id, param_from, value_from)] |
41 | 1x |
data_ls <- split(mini_data, param) |
42 | ||
43 |
# transform to named vector |
|
44 | 1x |
data_vec <- |
45 | 1x |
lapply( |
46 | 1x |
data_ls, |
47 | 1x |
function(x) setNames(x[[value_from]], x[[id]]) |
48 |
) |
|
49 | ||
50 |
# query each id in each param |
|
51 | 1x |
all_vec <- lapply(data_vec, function(x) setNames(x[unique_id], unique_id)) |
52 | ||
53 | 1x |
bind_data <- as.data.frame(all_vec) |
54 | ||
55 | 1x |
res <- cbind(id = unique_id, bind_data) |
56 | 1x |
rownames(res) <- NULL |
57 | ||
58 | 1x |
res |
59 |
} |
|
60 | ||
61 |
#' Transforming data.frame with Complex Identifiers into Wide Format |
|
62 |
#' |
|
63 |
#' @details This function allows to identify observations on the basis of several columns. Warning: Instead of nesting |
|
64 |
#' duplicated values, the function will throw an error if the same parameter is provided twice for the same |
|
65 |
#' observation. |
|
66 |
#' |
|
67 |
#' @param data (`data.frame`) to be pivoted. |
|
68 |
#' @param id (`character`) the name of the columns whose combination uniquely identify the observations. |
|
69 |
#' @param param_from (`character`) the name of the column containing the names of the parameters to be pivoted. The |
|
70 |
#' unique values in this column will become column names in the output. |
|
71 |
#' @param value_from (`character`) the name of the column containing the values that will populate the output. |
|
72 |
#' @param drop_na (`logical`) should column containing only `NAs` be dropped. |
|
73 |
#' |
|
74 |
#' @return `data.frame` in a wide format. |
|
75 |
#' |
|
76 |
#' @export |
|
77 |
#' @examples |
|
78 |
#' test_data <- data.frame( |
|
79 |
#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), |
|
80 |
#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), |
|
81 |
#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), |
|
82 |
#' the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE) |
|
83 |
#' ) |
|
84 |
#' |
|
85 |
#' multi_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val") |
|
86 |
#' multi_pivot_wider(test_data, "the_obs2", "the_param", "the_val") |
|
87 |
multi_pivot_wider <- function(data, |
|
88 |
id, |
|
89 |
param_from, |
|
90 |
value_from, |
|
91 |
drop_na = FALSE) { |
|
92 |
# check for duplication of observation-parameter |
|
93 | 19x |
checkmate::assert_data_frame(data, min.rows = 1, min.cols = 3) |
94 | 19x |
checkmate::assert_character(id) |
95 | 19x |
checkmate::assert_character(param_from, len = 1) |
96 | 19x |
checkmate::assert_character(value_from, len = 1) |
97 | 19x |
checkmate::assert_false(any(duplicated(data[, c(id, param_from)]))) |
98 | 18x |
checkmate::assert_subset(c(id, param_from, value_from), colnames(data)) |
99 | ||
100 |
# find a way to sort |
|
101 | 18x |
unique_id <- unique(data[id]) |
102 | 18x |
key <- apply(unique_id[id], 1, paste, collapse = "-") |
103 | 18x |
unique_id <- cbind(key, unique_id) |
104 | ||
105 | 18x |
param <- data[[param_from]] |
106 | ||
107 | 18x |
mini_data <- data[, c(param_from, value_from)] |
108 | 18x |
f_key <- apply(data[id], 1, paste, collapse = "-") |
109 | 18x |
mini_data <- cbind(f_key, mini_data) |
110 | ||
111 | 18x |
data_ls <- split(mini_data, param) |
112 | ||
113 |
# Transform to named vector, the first column is the key. |
|
114 | 18x |
data_vec <- |
115 | 18x |
lapply( |
116 | 18x |
data_ls, |
117 | 18x |
function(x) setNames(x[[value_from]], x[, 1]) |
118 |
) |
|
119 | ||
120 |
# query each id in each param |
|
121 | 18x |
all_vec <- lapply(data_vec, function(x) x[unique_id[, 1]]) |
122 | ||
123 | 16x |
if (drop_na) all_vec <- Filter(function(x) !all(is.na(x)), all_vec) |
124 | ||
125 | 18x |
bind_data <- do.call(cbind, all_vec) |
126 | ||
127 | 18x |
res <- cbind(unique_id[, -1, drop = FALSE], bind_data) |
128 | ||
129 | 18x |
rownames(res) <- NULL |
130 | 18x |
res |
131 |
} |
|
132 | ||
133 |
#' Transforming data.frame with multiple Data Column into Wide Format |
|
134 |
#' |
|
135 |
#' @details This function is adapted to cases where the data are distributed in several columns while the name of the |
|
136 |
#' parameter is in one. Typical example is `adsub` where numeric data are stored in `AVAL` while categorical data are |
|
137 |
#' in `AVALC`. |
|
138 |
#' |
|
139 |
#' @param data (`data.frame`) to be pivoted. |
|
140 |
#' @param id (`character`) the name of the columns whose combination uniquely identify the observations. |
|
141 |
#' @param param_from (`character`) the name of the columns containing the names of the parameters to be pivoted. The |
|
142 |
#' unique values in this column will become column names in the output. |
|
143 |
#' @param value_from (`character`) the name of the column containing the values that will populate the output. |
|
144 |
#' @param labels_from (`character`) the name of the column congaing the labels of the new columns. from. If not |
|
145 |
#' provided, the labels will be equal to the column names. When several labels are available for the same column, the |
|
146 |
#' first one will be selected. |
|
147 |
#' |
|
148 |
#' @return `list` of `data.frame` in a wide format with label attribute attached to each columns. |
|
149 |
#' |
|
150 |
#' @export |
|
151 |
#' @examples |
|
152 |
#' test_data <- data.frame( |
|
153 |
#' the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), |
|
154 |
#' the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), |
|
155 |
#' the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), |
|
156 |
#' the_label = c( |
|
157 |
#' "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)", |
|
158 |
#' "Gender", "Height (cm)", "Height (cm)", "Pre-condition" |
|
159 |
#' ), |
|
160 |
#' the_val = c(65, 165, NA, 66, NA, 166, 155, NA), |
|
161 |
#' the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE) |
|
162 |
#' ) |
|
163 |
#' |
|
164 |
#' x <- poly_pivot_wider( |
|
165 |
#' test_data, |
|
166 |
#' c("the_obs", "the_obs2"), |
|
167 |
#' "the_param", |
|
168 |
#' c("the_val", "the_val2"), |
|
169 |
#' "the_label" |
|
170 |
#' ) |
|
171 |
#' x |
|
172 |
#' Reduce(function(u, v) merge(u, v, all = TRUE), x) |
|
173 |
poly_pivot_wider <- function(data, |
|
174 |
id, |
|
175 |
param_from, |
|
176 |
value_from, |
|
177 |
labels_from = NULL) { |
|
178 |
# other tests are performed at lower levels. |
|
179 | 7x |
checkmate::assert_character(value_from, unique = TRUE) |
180 | ||
181 |
# Create new labels for new columns. |
|
182 | 7x |
if (is.null(labels_from) || labels_from == param_from) { |
183 | 1x |
new_labels <- unique(data[[param_from]]) |
184 | 1x |
names(new_labels) <- new_labels |
185 |
} else { |
|
186 | 6x |
checkmate::assert_character(labels_from, len = 1) |
187 | 6x |
checkmate::assert_subset(labels_from, colnames(data)) |
188 | ||
189 | 6x |
new_labels_df <- data[, c(labels_from, param_from)] |
190 | 6x |
new_labels_df <- unique(new_labels_df) |
191 | ||
192 | 6x |
new_labels <- as.character(new_labels_df[[labels_from]]) |
193 | 6x |
names(new_labels) <- as.character(new_labels_df[[param_from]]) |
194 |
} |
|
195 | ||
196 |
# Retrieve old labels. |
|
197 | 7x |
old_labels <- lapply(data, attr, "label") |
198 | 7x |
n_old_label <- names(old_labels) |
199 | 7x |
null_label <- unlist(lapply(old_labels, is.null)) |
200 | 7x |
old_labels[null_label] <- n_old_label[null_label] |
201 | 7x |
old_labels <- unlist(old_labels) |
202 | ||
203 | 7x |
all_labels <- c(new_labels, old_labels) |
204 | ||
205 | 7x |
res_ls <- list() |
206 | 7x |
for (n_value_from in value_from) { |
207 | 14x |
res <- multi_pivot_wider( |
208 | 14x |
data = data, |
209 | 14x |
id = id, |
210 | 14x |
param_from = param_from, |
211 | 14x |
value_from = n_value_from, |
212 | 14x |
drop_na = TRUE |
213 |
) |
|
214 | ||
215 | 14x |
res <- attr_label_df(res, all_labels[colnames(res)]) |
216 | 14x |
res_ls[[n_value_from]] <- res |
217 |
} |
|
218 | 7x |
res_ls |
219 |
} |
1 |
#' Join `adsub` to `adsl` |
|
2 |
#' |
|
3 |
#' @param adam_db (`list` of `data.frame`) object input with an `adsl` and `adsub` table. |
|
4 |
#' @param keys (`character`) the name of the columns in `adsl` uniquely identifying a row. |
|
5 |
#' @param continuous_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which |
|
6 |
#' columns containing continuous values should be created. If `"all"`, all parameter values are selected, if `NULL`, |
|
7 |
#' none are selected. |
|
8 |
#' @param categorical_var (`character`) the value of a parameter in the `PARAMCD` column of the `adsub` table from which |
|
9 |
#' columns containing categorical values should be created. If `"all"`, all parameter values are selected, if `NULL`, |
|
10 |
#' none are selected. |
|
11 |
#' @param continuous_suffix (`string`) the suffixes to add to the newly generated columns containing continuous values. |
|
12 |
#' @param categorical_suffix (`string`) the suffixes to add to the newly generated columns containing categorical |
|
13 |
#' values. |
|
14 |
#' |
|
15 |
#' @return a `list` of `data.frame` with new columns in the `adsl` table. |
|
16 |
#' |
|
17 |
#' @rdname join_adsub_adsl |
|
18 |
#' @export |
|
19 |
#' |
|
20 |
join_adsub_adsl <- function(adam_db, |
|
21 |
keys, |
|
22 |
continuous_var, |
|
23 |
categorical_var, |
|
24 |
continuous_suffix, |
|
25 |
categorical_suffix) { |
|
26 | 5x |
UseMethod("join_adsub_adsl") |
27 |
} |
|
28 | ||
29 |
#' @rdname join_adsub_adsl |
|
30 |
#' @export |
|
31 |
#' |
|
32 |
#' @examples |
|
33 |
#' adsl <- data.frame( |
|
34 |
#' USUBJID = c("S1", "S2", "S3", "S4"), |
|
35 |
#' STUDYID = "My_study", |
|
36 |
#' AGE = c(60, 44, 23, 31) |
|
37 |
#' ) |
|
38 |
#' |
|
39 |
#' adsub <- data.frame( |
|
40 |
#' USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"), |
|
41 |
#' STUDYID = "My_study", |
|
42 |
#' PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"), |
|
43 |
#' PARAMCD = c("w", "w", "w", "w", "h", "h", "h"), |
|
44 |
#' AVAL = c(98, 75, 70, 71, 182, 155, 152), |
|
45 |
#' AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180") |
|
46 |
#' ) |
|
47 |
#' |
|
48 |
#' db <- list(adsl = adsl, adsub = adsub) |
|
49 |
#' |
|
50 |
#' x <- join_adsub_adsl(adam_db = db) |
|
51 |
#' x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h") |
|
52 |
join_adsub_adsl.list <- function(adam_db, |
|
53 |
keys = c("USUBJID", "STUDYID"), |
|
54 |
continuous_var = "all", |
|
55 |
categorical_var = "all", |
|
56 |
continuous_suffix = "", |
|
57 |
categorical_suffix = "_CAT") { |
|
58 | 5x |
checkmate::assert_list(adam_db, types = "data.frame") |
59 | 5x |
checkmate::assert_names(names(adam_db), must.include = c("adsl", "adsub")) |
60 | 5x |
checkmate::assert_names(names(adam_db$adsub), must.include = c("PARAM", "PARAMCD", "AVAL", "AVALC", keys)) |
61 | 5x |
checkmate::assert_names(names(adam_db$adsl), must.include = keys) |
62 | 5x |
checkmate::assert_string(continuous_suffix) |
63 | 5x |
checkmate::assert_string(categorical_suffix) |
64 | ||
65 | 5x |
value_col <- c("AVAL", "AVALC") |
66 | 5x |
vars_ls <- list(continuous_var, categorical_var) |
67 | 5x |
suffix_ls <- list(continuous_suffix, categorical_suffix) |
68 | ||
69 |
# Select variables names. |
|
70 | 5x |
vars_ls <- lapply(vars_ls, function(x) { |
71 | 10x |
if (identical(x, "all")) { |
72 | 6x |
unique(adam_db$adsub$PARAMCD) |
73 |
} else { |
|
74 | 4x |
x |
75 |
} |
|
76 |
}) |
|
77 | ||
78 |
# Create new variable names. |
|
79 | 5x |
vars_nam <- mapply( |
80 | 5x |
function(x, y) { |
81 | 10x |
if (!is.null(x)) { |
82 | 6x |
names(x) <- paste0(x, y) |
83 | 6x |
x |
84 |
} else { |
|
85 | 4x |
NULL |
86 |
} |
|
87 |
}, |
|
88 | 5x |
vars_ls, |
89 | 5x |
suffix_ls, |
90 | 5x |
SIMPLIFY = FALSE |
91 |
) |
|
92 | ||
93 |
# Test if new columns already exist in adsl. |
|
94 | 5x |
assert_names_notadsl(vars_nam, adam_db$adsl) |
95 | ||
96 |
# Test if categorical and continuous column will result in the same column name. |
|
97 | 5x |
assert_names_collision(vars_nam) |
98 | ||
99 |
# Pivot and keep labels. |
|
100 | 5x |
adsub_wide_ls <- |
101 | 5x |
adam_db$adsub %>% |
102 | 5x |
poly_pivot_wider(id = keys, param_from = "PARAMCD", value_from = value_col, labels_from = "PARAM") |
103 | ||
104 |
# Merge categorical and continuous variables. |
|
105 | 5x |
for (i in seq_along(value_col)) { |
106 | 10x |
adsub_df <- adsub_wide_ls[[value_col[i]]] |
107 | 10x |
adsub_df <- adsub_df[, c(keys, vars_nam[[i]])] |
108 | 10x |
colnames(adsub_df) <- c(keys, names(vars_nam[[i]])) |
109 | ||
110 | 10x |
adam_db$adsl <- dplyr::left_join( |
111 | 10x |
x = adam_db$adsl, |
112 | 10x |
y = adsub_df, |
113 | 10x |
by = keys |
114 |
) |
|
115 |
} |
|
116 | ||
117 | 5x |
adam_db |
118 |
} |
|
119 | ||
120 |
# Utility functions ---- |
|
121 | ||
122 |
assert_names_collision <- function(vars_nam) { |
|
123 | 5x |
final_names_ls <- lapply(vars_nam, names) |
124 | 5x |
in_both <- final_names_ls[[1]] %in% final_names_ls[[2]] |
125 | 5x |
if (any(in_both)) { |
126 | ! |
warning( |
127 | ! |
paste( |
128 | ! |
toString(final_names_ls[[1]][in_both]), |
129 | ! |
"are new columns for continuous and categorical variable, |
130 | ! |
Please set different `continuous_suffix` or `categorical_suffix` |
131 | ! |
or select different columns to avoid automatic renaming." |
132 |
) |
|
133 |
) |
|
134 |
} |
|
135 |
} |
|
136 | ||
137 |
assert_names_notadsl <- function(vars_nam, df) { |
|
138 | 5x |
final_names <- unique(sapply(vars_nam, names)) |
139 | 5x |
already_in_adsl <- final_names %in% colnames(df) |
140 | 5x |
if (any(already_in_adsl)) { |
141 | 1x |
warning( |
142 | 1x |
paste( |
143 | 1x |
toString(final_names[already_in_adsl]), |
144 | 1x |
"already exist in adsl, the name will default to another values. |
145 | 1x |
Please change `continuous_suffix` or `categorical_suffix` to avoid automatic reneaming" |
146 |
) |
|
147 |
) |
|
148 |
} |
|
149 |
} |
1 |
#' Filter Data with Log |
|
2 |
#' @param data (`data.frame`) input data to subset, or named (`list`) of (`data.frame`). |
|
3 |
#' @param condition (`call`) of subset condition. Must evaluate as logical. |
|
4 |
#' @param suffix (`string`) optional argument describing the filter. |
|
5 |
#' @param ... further arguments to be passed to or from other methods. |
|
6 |
#' |
|
7 |
#' @details |
|
8 |
#' `log_filter` will filter the data/named list of data according to the `condition`. |
|
9 |
#' All the variables in `condition` must exist in the data (as variables) or in the parent |
|
10 |
#' frame(e.g., in global environment). |
|
11 |
#' For named list of data, if `ADSL` is available, `log_filter` will also try to subset all |
|
12 |
#' other datasets with `USUBJID`. |
|
13 |
#' @export |
|
14 |
log_filter <- function(data, condition, ...) { |
|
15 | 40x |
UseMethod("log_filter") |
16 |
} |
|
17 | ||
18 |
#' @rdname log_filter |
|
19 |
#' @export |
|
20 |
#' @examples |
|
21 |
#' data <- iris |
|
22 |
#' attr(data$Sepal.Length, "label") <- "cm" |
|
23 |
#' log_filter(data, Sepal.Length >= 7) |
|
24 |
#' |
|
25 |
log_filter.data.frame <- function(data, condition, suffix = NULL, ...) { |
|
26 | 23x |
checkmate::assert_string(suffix, null.ok = TRUE) |
27 | ||
28 | 23x |
condition <- match.call()$condition |
29 | 23x |
vars <- all.vars(condition) |
30 | 23x |
var_in_env <- vapply(vars, exists, envir = parent.frame(), inherits = TRUE, FUN.VALUE = TRUE) |
31 | 23x |
var_in_data <- vapply(vars, `%in%`, table = names(data), FUN.VALUE = TRUE) |
32 | 23x |
if (!all(var_in_env | var_in_data)) { |
33 | 1x |
stop(sprintf("Variable %s not found in data or environment.", toString(vars[!(var_in_data | var_in_env)]))) |
34 |
} |
|
35 | 22x |
res <- eval(bquote(dplyr::filter(data, .(condition)))) |
36 | 22x |
rows <- list(list(init = nrow(data), final = nrow(res), suffix = suffix)) |
37 | 22x |
rlbl <- paste0(deparse(condition), collapse = "") |
38 | 22x |
rlbl <- stringr::str_replace_all(rlbl, "\\s+", " ") |
39 | 22x |
names(rows) <- rlbl |
40 | 22x |
attr(res, "rows") <- c(attr(data, "rows"), rows) |
41 | ||
42 | 22x |
res |
43 |
} |
|
44 | ||
45 |
#' @rdname log_filter |
|
46 |
#' @param table (`string`) table name. |
|
47 |
#' @param by (`character`) variable names shared by `adsl` and other datasets for filtering. |
|
48 |
#' @export |
|
49 |
#' @examples |
|
50 |
#' log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0)) |
|
51 |
log_filter.list <- function(data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, ...) { |
|
52 | 17x |
checkmate::assert_list(data, types = "data.frame", names = "unique") |
53 | 17x |
checkmate::assert_subset(table, names(data)) |
54 | 17x |
checkmate::assert_names(colnames(data[[table]]), must.include = by) |
55 | 17x |
condition <- match.call()$condition |
56 | 17x |
data[[table]] <- eval(bquote(log_filter(data[[table]], .(condition), .(suffix)))) |
57 | 17x |
if (identical(table, "adsl")) { |
58 | 8x |
for (k in setdiff(names(data), "adsl")) { |
59 | 8x |
if (all(by %in% names(data[[k]]))) { |
60 | 1x |
if (length(by) == 0) by <- intersect(names(data[[k]]), names(data$adsl)) |
61 | ||
62 | 8x |
ori_n <- nrow(data[[k]]) |
63 | 8x |
ori_att <- attr(data[[k]], "rows") |
64 | ||
65 | 8x |
data[[k]] <- dplyr::semi_join(data[[k]], data$adsl, by = by) |
66 | ||
67 | 8x |
rows <- list(list(init = ori_n, final = nrow(data[[k]]), suffix = suffix)) |
68 | 8x |
names(rows) <- paste0("Filtered by adsl: ", deparse(condition), collapse = "") |
69 | 8x |
attr(data[[k]], "rows") <- c(ori_att, rows) |
70 |
} |
|
71 |
} |
|
72 |
} |
|
73 | 17x |
return(data) |
74 |
} |
|
75 | ||
76 |
# Get Log ---- |
|
77 | ||
78 |
#' Get Log |
|
79 |
#' |
|
80 |
#' @param data (`list` of `data.frame` or `data.frame`) filtered with `log_filter`. |
|
81 |
#' @param incl (`flag`) should information about unfiltered `data.frame` be printed. |
|
82 |
#' @param incl.adsl (`flag`) should indication of filtering performed through `adsl` be printed. |
|
83 |
#' |
|
84 |
#' @export |
|
85 |
get_log <- function(data, incl, incl.adsl) { |
|
86 | 39x |
UseMethod("get_log") |
87 |
} |
|
88 | ||
89 |
#' @rdname get_log |
|
90 |
#' @export |
|
91 |
#' @examples |
|
92 |
#' data <- log_filter(iris, Sepal.Length >= 7, "xx") |
|
93 |
#' data <- log_filter(data, Sepal.Length < 2) |
|
94 |
#' data <- log_filter(data, Sepal.Length >= 2, "yy") |
|
95 |
#' get_log(data) |
|
96 |
#' |
|
97 |
get_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) { |
|
98 | 27x |
checkmate::assert_flag(incl) |
99 | ||
100 | 27x |
att <- attr(data, "rows") |
101 | 27x |
if (!incl.adsl) { |
102 | 4x |
sel <- grepl("Filtered by adsl", names(att)) |
103 | 4x |
att <- att[!sel] |
104 |
} |
|
105 | ||
106 | 27x |
if (length(att) != 0L) { |
107 | 17x |
start_row <- lapply(att, "[[", "init") |
108 | 17x |
end_row <- lapply(att, "[[", "final") |
109 | 17x |
suffix <- lapply(att, "[[", "suffix") |
110 | 17x |
suffix <- vapply(suffix, function(x) ifelse(is.null(x), "", paste0(x, ": ")), character(1)) |
111 | 17x |
res <- paste0(suffix, names(att), " [", start_row, " --> ", end_row, " rows.]") |
112 | 10x |
} else if (incl) { |
113 | 6x |
paste0("No filtering [", nrow(data), " rows.]") |
114 |
} else { |
|
115 | 4x |
NULL |
116 |
} |
|
117 |
} |
|
118 | ||
119 | ||
120 |
#' @rdname get_log |
|
121 |
#' @export |
|
122 |
#' @examples |
|
123 |
#' data <- log_filter( |
|
124 |
#' list(iris1 = iris, iris2 = iris), |
|
125 |
#' Sepal.Length >= 7, |
|
126 |
#' "iris1", |
|
127 |
#' character(0), |
|
128 |
#' "Sep" |
|
129 |
#' ) |
|
130 |
#' get_log(data) |
|
131 |
#' |
|
132 |
get_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) { |
|
133 | 12x |
checkmate::assert_list(data, types = "data.frame", names = "unique") |
134 | 12x |
checkmate::assert_flag(incl) |
135 | ||
136 | 12x |
lapply(data, get_log, incl = incl, incl.adsl = incl.adsl) |
137 |
} |
|
138 | ||
139 |
# Print Log ---- |
|
140 | ||
141 |
#' Print Log |
|
142 |
#' |
|
143 |
#' @inheritParams get_log |
|
144 |
#' @export |
|
145 |
#' |
|
146 |
print_log <- function(data, incl, incl.adsl) { |
|
147 | 10x |
UseMethod("print_log") |
148 |
} |
|
149 | ||
150 |
#' @rdname print_log |
|
151 |
#' @export |
|
152 |
#' @examples |
|
153 |
#' data <- log_filter(iris, Sepal.Length >= 7, "Sep") |
|
154 |
#' print_log(data) |
|
155 |
print_log.data.frame <- function(data, incl = TRUE, incl.adsl = TRUE) { |
|
156 | 3x |
checkmate::assert_flag(incl) |
157 | ||
158 | 3x |
cat("Filter Log:") |
159 | 3x |
cat(paste0("\n ", get_log(data, incl = incl, incl.adsl = incl.adsl))) |
160 | 3x |
cat("\n") |
161 | 3x |
invisible() |
162 |
} |
|
163 | ||
164 |
#' @rdname print_log |
|
165 |
#' @export |
|
166 |
#' @examples |
|
167 |
#' data <- log_filter( |
|
168 |
#' list( |
|
169 |
#' adsl = iris, |
|
170 |
#' iris2 = iris, |
|
171 |
#' mtcars = mtcars, |
|
172 |
#' iris3 = iris |
|
173 |
#' ), |
|
174 |
#' Sepal.Length >= 7, |
|
175 |
#' "adsl", |
|
176 |
#' character(0), |
|
177 |
#' "adsl filter" |
|
178 |
#' ) |
|
179 |
#' data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter") |
|
180 |
#' print_log(data) |
|
181 |
#' print_log(data, incl = FALSE) |
|
182 |
#' print_log(data, incl.adsl = FALSE, incl = FALSE) |
|
183 |
print_log.list <- function(data, incl = TRUE, incl.adsl = TRUE) { |
|
184 | 7x |
checkmate::assert_list(data, types = "data.frame", names = "unique") |
185 | 7x |
checkmate::assert_flag(incl) |
186 | ||
187 | 7x |
filter_log <- get_log(data, incl = incl, incl.adsl = incl.adsl) |
188 | ||
189 | 7x |
if (!incl) { |
190 | 2x |
filter_log <- filter_log[!vapply(filter_log, is.null, logical(1))] |
191 |
} |
|
192 | ||
193 | 7x |
cat("Filter Log:") |
194 | 7x |
if (length(filter_log) == 0) { |
195 | 1x |
cat("\n No filtering") |
196 |
} else { |
|
197 | 6x |
mapply( |
198 | 6x |
function(x, y) { |
199 | 11x |
cat(paste0("\n - ", x, ":")) |
200 | 11x |
cat(paste0("\n ", y, "")) |
201 |
}, |
|
202 | 6x |
as.list(names(filter_log)), |
203 | 6x |
filter_log |
204 |
) |
|
205 |
} |
|
206 | 7x |
cat("\n") |
207 | ||
208 | 7x |
invisible() |
209 |
} |
1 |
#' Reformat Values |
|
2 |
#' @param obj object to reformat. |
|
3 |
#' @param format (`rule`) or (`list`) of `rule` depending on the class of obj. |
|
4 |
#' @param ... for compatibility between methods and pass additional special mapping to transform rules. |
|
5 |
#' * `.string_as_fct` (`flag`) whether the reformatted character object should be converted to factor. |
|
6 |
#' * `.to_NA` (`character`) values that should be converted to `NA`. For `factor`, the corresponding levels are |
|
7 |
#' dropped. If `NULL`, the argument will be taken from the `to_NA`attribute of the rule. |
|
8 |
#' * `.drop` (`flag`) whether to drop empty levels. If `NULL`, the argument will be taken from the `drop`attribute of |
|
9 |
#' the rule. |
|
10 |
#' * `.na_last` (`flag`) whether the level replacing `NA` should be last. |
|
11 |
#' |
|
12 |
#' @export |
|
13 |
#' @note When the rule is empty rule or when values subject to reformatting are absent from the object, no error is |
|
14 |
#' raised. The conversion to factor if `.string_as_fct = TRUE`) is still carried out. The conversion of the levels |
|
15 |
#' declared in `.to_NA` to `NA` values occurs after the remapping. `NA` values created this way are not affected by a |
|
16 |
#' rule declaring a remapping of `NA` values. For factors, level dropping is the last step, hence, levels converted to |
|
17 |
#' `NA` by the `.to_NA` argument, will be removed if `.drop` is `TRUE`. Arguments passed via `reformat` override the |
|
18 |
#' ones defined during rule creation. |
|
19 |
#' |
|
20 |
#' @rdname reformat |
|
21 |
#' |
|
22 |
reformat <- function(obj, ...) { |
|
23 | 51x |
UseMethod("reformat") |
24 |
} |
|
25 | ||
26 |
#' @export |
|
27 |
#' @rdname reformat |
|
28 |
reformat.default <- function(obj, format, ...) { |
|
29 | 2x |
if (!is(format, "empty_rule")) { |
30 | 1x |
warning(paste0(c("Not implemented for class: ", toString(class(obj)), "! Only empty rule allowed."))) |
31 |
} |
|
32 | 2x |
return(obj) |
33 |
} |
|
34 | ||
35 |
#' @export |
|
36 |
#' @rdname reformat |
|
37 |
#' |
|
38 |
#' @examples |
|
39 |
#' |
|
40 |
#' # Reformatting of character. |
|
41 |
#' obj <- c("a", "b", "x", NA) |
|
42 |
#' attr(obj, "label") <- "my label" |
|
43 |
#' format <- rule("A" = "a", "NN" = NA) |
|
44 |
#' |
|
45 |
#' reformat(obj, format) |
|
46 |
#' reformat(obj, format, .string_as_fct = FALSE, .to_NA = "x") |
|
47 |
#' reformat(obj, empty_rule, .string_as_fct = FALSE, .to_NA = "x") |
|
48 |
#' |
|
49 |
reformat.character <- function(obj, format, ...) { |
|
50 | 19x |
checkmate::assert_class(format, "rule") |
51 | ||
52 |
# Give priority to argument defined in reformat. |
|
53 | 19x |
format <- do.call(rule, modifyList(as.list(format), list(...))) |
54 | ||
55 | 19x |
if (attr(format, ".string_as_fct")) { |
56 |
# Keep attributes. |
|
57 | 11x |
att <- attributes(obj) |
58 | 11x |
obj_fact <- as.factor(obj) |
59 | 11x |
supp_att_name <- setdiff(names(att), attributes(obj_fact)) |
60 | 11x |
supp_att <- att[supp_att_name] |
61 | 11x |
attributes(obj_fact) <- c(attributes(obj_fact), supp_att) |
62 | ||
63 | 11x |
reformat(obj_fact, format) |
64 |
} else { |
|
65 | 8x |
if (!is(format, "empty_rule")) { |
66 | 6x |
value_match <- unlist(format) |
67 | 6x |
m <- match(obj, value_match) |
68 | 6x |
obj[!is.na(m)] <- names(format)[m[!is.na(m)]] |
69 |
} |
|
70 | ||
71 | ||
72 | 8x |
val_to_NA <- attr(format, ".to_NA") |
73 | 8x |
if (!is.null(val_to_NA)) { |
74 | 4x |
obj[obj %in% val_to_NA] <- NA_character_ |
75 |
} |
|
76 | ||
77 | 8x |
obj |
78 |
} |
|
79 |
} |
|
80 | ||
81 |
#' @export |
|
82 |
#' @rdname reformat |
|
83 |
#' |
|
84 |
#' @examples |
|
85 |
#' |
|
86 |
#' # Reformatting of factor. |
|
87 |
#' obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z")) |
|
88 |
#' attr(obj, "label") <- "my label" |
|
89 |
#' format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P") |
|
90 |
#' |
|
91 |
#' reformat(obj, format) |
|
92 |
#' reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE) |
|
93 |
#' reformat(obj, empty_rule, .na_last = FALSE, .to_NA = "b", .drop = FALSE) |
|
94 |
#' |
|
95 |
reformat.factor <- function(obj, format, ...) { |
|
96 | 27x |
checkmate::assert_class(format, "rule") |
97 | ||
98 | 27x |
format <- do.call(rule, modifyList(as.list(format), list(...))) |
99 | ||
100 | 27x |
if (!is(format, "empty_rule")) { |
101 | 20x |
any_na <- anyNA(obj) |
102 | 20x |
if (any(is.na(format)) && any_na) { |
103 | 18x |
obj <- forcats::fct_na_value_to_level(obj) |
104 |
} |
|
105 | ||
106 | 20x |
absent_format <- format[!format %in% levels(obj)] |
107 | 20x |
sel_format <- format[format %in% levels(obj)] |
108 | 20x |
obj <- forcats::fct_recode(obj, !!!sel_format) |
109 | 20x |
obj <- forcats::fct_expand(obj, unique(names(absent_format))) |
110 | 20x |
obj <- forcats::fct_relevel(obj, unique(names(format))) |
111 | ||
112 | 20x |
if (any(is.na(format)) && attr(format, ".na_last")) { |
113 | 13x |
na_lvl <- names(format)[is.na(format)] |
114 | 13x |
obj <- forcats::fct_relevel(obj, na_lvl, after = Inf) |
115 |
} |
|
116 |
} |
|
117 | ||
118 | 27x |
drop_lvl <- attr(format, ".drop") |
119 | 27x |
if (drop_lvl) { |
120 | 3x |
obj <- forcats::fct_drop(obj) |
121 |
} |
|
122 | ||
123 |
# Levels converted to NA are dropped. |
|
124 | 27x |
val_to_NA <- attr(format, ".to_NA") |
125 | 27x |
if (!is.null(val_to_NA)) { |
126 | 4x |
obj <- forcats::fct_na_level_to_value(obj, val_to_NA) |
127 |
} |
|
128 | ||
129 | 27x |
obj |
130 |
} |
|
131 | ||
132 |
#' @export |
|
133 |
#' @rdname reformat |
|
134 |
#' |
|
135 |
#' @examples |
|
136 |
#' |
|
137 |
#' # Reformatting of list of data.frame. |
|
138 |
#' df1 <- data.frame( |
|
139 |
#' var1 = c("a", "b", NA), |
|
140 |
#' var2 = factor(c("F1", "F2", NA)) |
|
141 |
#' ) |
|
142 |
#' |
|
143 |
#' df2 <- data.frame( |
|
144 |
#' var1 = c("x", NA, "y"), |
|
145 |
#' var2 = factor(c("F11", NA, "F22")) |
|
146 |
#' ) |
|
147 |
#' |
|
148 |
#' db <- list(df1 = df1, df2 = df2) |
|
149 |
#' |
|
150 |
#' format <- list( |
|
151 |
#' df1 = list( |
|
152 |
#' var1 = rule("X" = "x", "N" = NA, .to_NA = "b") |
|
153 |
#' ), |
|
154 |
#' df2 = list( |
|
155 |
#' var1 = empty_rule, |
|
156 |
#' var2 = rule("f11" = "F11", "NN" = NA) |
|
157 |
#' ) |
|
158 |
#' ) |
|
159 |
#' |
|
160 |
#' reformat(db, format) |
|
161 |
reformat.list <- function(obj, format, ...) { |
|
162 | 3x |
checkmate::assert_list(obj, types = c("data.frame", "tibble")) |
163 | 3x |
checkmate::assert_named(obj) |
164 | 3x |
checkmate::assert_list(format, names = "unique", types = "list", null.ok = TRUE) |
165 | ||
166 | 3x |
if (length(format) == 0) { |
167 | 1x |
return(obj) |
168 |
} |
|
169 | ||
170 | 2x |
assert_valid_format(format) |
171 | ||
172 | 2x |
for (tab in names(format)) { |
173 | 3x |
local_map <- format[[tab]] |
174 | 3x |
local_map <- local_map[names(local_map) %in% names(obj[[tab]])] |
175 | ||
176 | 3x |
obj[[tab]][names(local_map)] <- mapply( |
177 | 3x |
function(rl, col) reformat(obj[[tab]][[col]], format = rl, ...), |
178 | 3x |
local_map, |
179 | 3x |
names(local_map), |
180 | 3x |
SIMPLIFY = FALSE |
181 |
) |
|
182 |
} |
|
183 | ||
184 | 2x |
obj |
185 |
} |
1 |
#' Create rule based on mappings |
|
2 |
#' @param ... Mapping pairs, the argument name is the transformed while |
|
3 |
#' its values are original values. |
|
4 |
#' @param .lst (`list`) of mapping. |
|
5 |
#' @param .string_as_fct (`flag`) whether to convert characters to factors. |
|
6 |
#' @param .na_last (`flag`) whether the level replacing `NA` should be last. |
|
7 |
#' @param .drop (`flag`) whether to drop empty levels. |
|
8 |
#' @param .to_NA (`character`) values that should be converted to `NA`. |
|
9 |
#' |
|
10 |
#' @note Conversion to `NA` is the last step of the remapping process. |
|
11 | ||
12 |
#' |
|
13 |
#' @export |
|
14 |
#' @examples |
|
15 |
#' rule("X" = "x", "Y" = c("y", "z")) |
|
16 |
#' rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE) |
|
17 |
#' |
|
18 |
rule <- function(..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = NULL) { |
|
19 | 97x |
checkmate::assert_flag(.string_as_fct) |
20 | 97x |
checkmate::assert_flag(.na_last) |
21 | 97x |
checkmate::assert_flag(.drop) |
22 | 97x |
checkmate::assert_character(.to_NA, null.ok = TRUE, any.missing = FALSE) |
23 | ||
24 | 97x |
if (length(.lst) == 0) { |
25 | 23x |
res <- empty_rule |
26 | 23x |
attr(res, ".string_as_fct") <- .string_as_fct %||% TRUE |
27 | 23x |
attr(res, ".drop") <- .drop %||% FALSE |
28 | 23x |
attr(res, ".to_NA") <- .to_NA %||% NULL |
29 | 23x |
return(res) |
30 |
} else { |
|
31 | 74x |
.lst[is.na(.lst)] <- NA_character_ |
32 | 74x |
if (!checkmate::test_list(.lst, types = c("character"))) { |
33 | 1x |
stop("Value mapping may only contain the type: {character}") |
34 |
} |
|
35 | 73x |
vals <- as.character(unlist(.lst, use.names = FALSE)) |
36 | 73x |
checkmate::assert_character(vals, unique = TRUE) |
37 | 71x |
nms <- unlist(lapply(seq_len(length(.lst)), function(x) { |
38 | 166x |
rep(names(.lst)[x], length(.lst[[x]])) |
39 |
})) |
|
40 | ||
41 | 71x |
res <- structure( |
42 | 71x |
setNames(vals, nms), |
43 | 71x |
class = c("rule", "character"), |
44 | 71x |
.string_as_fct = .string_as_fct, |
45 | 71x |
.na_last = .na_last, |
46 | 71x |
.drop = .drop, |
47 | 71x |
.to_NA = .to_NA |
48 |
) |
|
49 | ||
50 | 71x |
res |
51 |
} |
|
52 |
} |
|
53 | ||
54 |
#' Create empty rule |
|
55 |
#' @export |
|
56 |
empty_rule <- structure( |
|
57 |
character(0L), |
|
58 |
class = c("empty_rule", "rule", "character"), |
|
59 |
.string_as_fct = TRUE, |
|
60 |
.na_last = FALSE, |
|
61 |
.drop = FALSE |
|
62 |
) |
|
63 | ||
64 |
#' @export |
|
65 |
#' |
|
66 |
print.rule <- function(x, ...) { |
|
67 | 1x |
cat("Mapping of:\n") |
68 | 1x |
nms <- names(x) |
69 | 1x |
for (i in seq_len(length(x))) { |
70 | 2x |
cat(nms[i], " <- ", if (length(x[[i]]) > 1) sprintf("[%s]", toString(x[[i]])) else x[[i]], "\n") |
71 |
} |
|
72 | ! |
if (!is.null(attr(x, ".to_NA"))) cat("NA <- ", toString(attr(x, ".to_NA")), "\n") |
73 | 1x |
cat("Convert to factor:", attr(x, ".string_as_fct"), "\n") |
74 | 1x |
cat("Drop unused level:", attr(x, ".drop"), "\n") |
75 | 1x |
cat("NA-replacing level in last position:", attr(x, ".na_last"), "\n") |
76 |
} |
|
77 | ||
78 |
#' Convert nested list into list of `rule` |
|
79 |
#' @param obj (`nested list`) to convert into list of rules. |
|
80 |
#' @export |
|
81 |
#' @examples |
|
82 |
#' obj <- list( |
|
83 |
#' rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"), |
|
84 |
#' rule2 = list(Missing = c(NA, "")), |
|
85 |
#' rule3 = list(Missing = c(NA, ""), .drop = TRUE), |
|
86 |
#' rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy") |
|
87 |
#' ) |
|
88 |
#' list2rules(obj) |
|
89 |
#' |
|
90 |
list2rules <- function(obj) { |
|
91 | 2x |
coll <- checkmate::makeAssertCollection() |
92 | 2x |
checkmate::assert_list(obj, unique = TRUE, types = "list", add = coll) |
93 | 2x |
checkmate::assert_names(names(obj), type = "unique", add = coll) |
94 | 2x |
checkmate::reportAssertions(coll) |
95 | ||
96 | 1x |
lapply(obj, function(x) { |
97 | 3x |
do.call("rule", x) |
98 |
}) |
|
99 |
} |
|
100 | ||
101 |
#' Convert Rule to List |
|
102 |
#' @param x (`rule`) to convert. |
|
103 |
#' @param ... not used. |
|
104 |
#' |
|
105 |
#' @export |
|
106 |
#' @examples |
|
107 |
#' |
|
108 |
#' x <- rule("a" = c("a", "b"), "X" = "x") |
|
109 |
#' as.list(x) |
|
110 |
as.list.rule <- function(x, ...) { |
|
111 | 48x |
nms <- names(x) |
112 | 48x |
unames <- unique(nms) |
113 | 48x |
res <- lapply(unames, function(i) { |
114 | 80x |
unname(x[nms == i]) |
115 |
}) |
|
116 | ||
117 | ||
118 | 48x |
att <- attributes(x) |
119 | 48x |
arg <- att[!names(att) %in% c("names", "class")] |
120 | ||
121 | 48x |
res <- c(res, unname(arg)) |
122 | 48x |
unames <- c(unames, names(arg)) |
123 | ||
124 | 48x |
setNames(res, unames) |
125 |
} |
|
126 | ||
127 |
#' @export |
|
128 |
print.empty_rule <- function(x, ...) { |
|
129 | 2x |
cat("Empty mapping\n") |
130 | ! |
if (!is.null(attr(x, ".to_NA"))) cat("NA <- ", toString(attr(x, ".to_NA")), "\n") |
131 | 2x |
cat("Convert to factor:", attr(x, ".string_as_fct"), "\n") |
132 | 2x |
cat("Drop unused level:", attr(x, ".drop"), "\n") |
133 |
} |
1 |
#' Propagate Column |
|
2 |
#' |
|
3 |
#' `propagate`copy columns from a given table of a `list` of `data.frame` to all tables based on other |
|
4 |
#' common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving |
|
5 |
#' tables. In safe mode, the key must be unique in the original table. |
|
6 |
#' |
|
7 |
#' @param db (`list` of `data.frame`) object for which some variable need to be propagated. |
|
8 |
#' @param from (`string`) the name of the table where the variables to propagate are stored. |
|
9 |
#' @param add (`character`) the names of the variables to propagate. |
|
10 |
#' @param by (`character`) the key binding the `from` table to the other tables. |
|
11 |
#' @param safe (`flag`) should the key be checked for uniqueness in the `from` table. |
|
12 |
#' |
|
13 |
#' @return updated `list` of `data.frame`. |
|
14 |
#' |
|
15 |
#' @rdname propagate |
|
16 |
#' @export |
|
17 |
#' |
|
18 |
propagate <- function(db, from, add, by, safe = TRUE) { |
|
19 | 3x |
UseMethod("propagate") |
20 |
} |
|
21 | ||
22 |
#' @rdname propagate |
|
23 |
#' @export |
|
24 |
#' |
|
25 |
#' |
|
26 |
#' @examples |
|
27 |
#' df1 <- data.frame( |
|
28 |
#' id1 = c("a", "a", "c", "d", "e", "f"), |
|
29 |
#' id2 = c("A", "B", "A", "A", "A", "A"), |
|
30 |
#' int = c(1, 2, 3, 4, 5, 6), |
|
31 |
#' bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE) |
|
32 |
#' ) |
|
33 |
#' |
|
34 |
#' df2 <- data.frame( |
|
35 |
#' id1 = c("a", "a", "d", "e", "f", "g"), |
|
36 |
#' id2 = c("A", "B", "A", "A", "A", "A") |
|
37 |
#' ) |
|
38 |
#' |
|
39 |
#' df3 <- data.frame( |
|
40 |
#' id1 = c("a", "c", "d", "e", "f", "x"), |
|
41 |
#' id2 = c("A", "A", "A", "A", "B", "A"), |
|
42 |
#' int = c(11, 22, 33, 44, 55, 66) |
|
43 |
#' ) |
|
44 |
#' |
|
45 |
#' db <- list(df1 = df1, fd2 = df2, df3 = df3) |
|
46 |
#' propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2")) |
|
47 |
#' |
|
48 |
propagate.list <- function(db, from, add, by, safe = TRUE) { |
|
49 | 3x |
checkmate::assert_list(db, types = "data.frame", names = "unique") |
50 | 3x |
checkmate::assert_names(names(db), must.include = from) |
51 | 3x |
checkmate::assert_names(colnames(db[[from]]), must.include = add) |
52 | 3x |
checkmate::assert_names(colnames(db[[from]]), must.include = by) |
53 | 3x |
checkmate::assert_flag(safe) |
54 | ||
55 | 3x |
if (safe) { |
56 | 2x |
keys <- db[[from]][, by] |
57 | 1x |
if (anyDuplicated(keys)) stop(paste("Duplicated key")) |
58 |
} |
|
59 | ||
60 | 2x |
toJoin <- db[[from]] |
61 | ||
62 | 2x |
for (tab_name in setdiff(names(db), from)) { |
63 | 3x |
tab_colnames <- colnames(db[[tab_name]]) |
64 | 3x |
if (!all(add %in% tab_colnames) && all(by %in% tab_colnames)) { |
65 | 2x |
missing_var <- setdiff(add, tab_colnames) |
66 | 2x |
sel_var <- c(missing_var, by) |
67 | 2x |
sel_tab <- toJoin[, sel_var] |
68 | ||
69 | 2x |
cat(paste0("\nUpdating: ", tab_name, " with: ", toString(missing_var))) |
70 | ||
71 | 2x |
db[[tab_name]] <- db[[tab_name]] %>% |
72 | 2x |
dplyr::left_join(sel_tab, by = by) |
73 |
} else { |
|
74 | 1x |
cat(paste0("\nSkipping: ", tab_name)) |
75 |
} |
|
76 |
} |
|
77 | 2x |
cat("\n") |
78 | 2x |
return(db) |
79 |
} |
1 |
#' Safe transformer |
|
2 |
#' @details Obtain content in global environment by default. |
|
3 |
#' If not found, use the environment here. |
|
4 |
#' @keywords internal |
|
5 |
safe_transformer <- function(text, envir) { |
|
6 | 7x |
text_lower <- tolower(text) |
7 | 7x |
res <- if (exists(text_lower, envir = envir, inherits = FALSE)) { |
8 | 6x |
get(text_lower, envir = envir) |
9 |
} else { |
|
10 | 1x |
text |
11 |
} |
|
12 | 7x |
if (is.character(res)) { |
13 | 7x |
if (identical(text, tolower(text))) { |
14 | 3x |
res <- tolower(res) |
15 | 4x |
} else if (identical(text, toupper(text))) { |
16 | 2x |
res <- toupper(res) |
17 | 2x |
} else if (identical(text, stringr::str_to_title(text))) { |
18 | 2x |
res <- stringr::str_to_title(res) |
19 |
} |
|
20 |
} |
|
21 | 7x |
res |
22 |
} |
|
23 | ||
24 |
#' Render whiskers safely |
|
25 |
#' @param x (`character`) input to be rendered safely. |
|
26 |
#' @export |
|
27 |
render_safe <- function(x) { |
|
28 | 4x |
checkmate::assert_character(x, null.ok = TRUE) |
29 | 4x |
if (is.null(x)) { |
30 | ! |
return(NULL) |
31 |
} |
|
32 | 4x |
ret <- lapply( |
33 | 4x |
x, |
34 | 4x |
glue::glue, |
35 | 4x |
.transformer = safe_transformer, |
36 | 4x |
.envir = whisker_env, |
37 | 4x |
.null = "NULL", |
38 | 4x |
.open = "{", |
39 | 4x |
.close = "}" |
40 |
) |
|
41 | 4x |
ret <- vapply(ret, `[[`, i = 1L, FUN.VALUE = "") |
42 | 4x |
setNames(ret, names(x)) |
43 |
} |
|
44 |
#' Add whisker values |
|
45 |
#' @param x Named (`character`) input. |
|
46 |
#' @export |
|
47 |
add_whisker <- function(x) { |
|
48 | 1x |
checkmate::assert_character(x, names = "unique", any.missing = FALSE) |
49 | 1x |
lapply( |
50 | 1x |
names(x), |
51 | 1x |
function(i) { |
52 | 2x |
assign(i, x[i], envir = whisker_env) |
53 |
} |
|
54 |
) |
|
55 | 1x |
invisible() |
56 |
} |
|
57 | ||
58 |
#' Remove whisker values |
|
59 |
#' @param x Named (`character`) input. |
|
60 |
#' @export |
|
61 |
remove_whisker <- function(x) { |
|
62 | 1x |
checkmate::assert_character(x, any.missing = FALSE) |
63 | 1x |
rm(list = x, envir = whisker_env) |
64 |
} |
1 |
#' Unite Columns of a Table in a `list` of `data.frame`. |
|
2 |
#' |
|
3 |
#' @param adam_db (`list` of `data.frames`) to be transformed. |
|
4 |
#' @param tab (`string`) the name of a table in the `adam_db` object. |
|
5 |
#' @param cols (`character`) the name of the columns to unite. |
|
6 |
#' @param sep (`string`) the separator for the new column name. |
|
7 |
#' @param new (`string`) the name of the new column. If `NULL` the concatenation of `cols` separated by `sep` is used. |
|
8 |
#' |
|
9 |
#' @return `list` of `data.frames` object with a united column. |
|
10 |
#' @export |
|
11 |
#' |
|
12 |
#' @examples |
|
13 |
#' db <- list(mtcars = mtcars, iris = iris) |
|
14 |
#' |
|
15 |
#' x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION") |
|
16 |
#' x$mtcars |
|
17 |
ls_unite <- function(adam_db, tab, cols, sep = ".", new = NULL) { |
|
18 | 4x |
checkmate::assert_list(adam_db, types = "data.frame") |
19 | 4x |
checkmate::assert_string(tab) |
20 | 4x |
checkmate::assert_names(names(adam_db), must.include = tab) |
21 | 4x |
checkmate::assert_character(cols, min.len = 1) |
22 | 4x |
checkmate::assert_names(names(adam_db[[tab]]), must.include = cols) |
23 | 4x |
checkmate::assert_string(sep) |
24 | 4x |
checkmate::assert_string(new, null.ok = TRUE) |
25 | ||
26 | 4x |
x_interaction <- if (!is.null(new)) { |
27 | 3x |
new |
28 |
} else { |
|
29 | 1x |
paste(cols, collapse = sep) |
30 |
} |
|
31 | ||
32 | 4x |
x_df <- adam_db[[tab]][, cols, drop = FALSE] |
33 | 4x |
lvl <- lapply(x_df, function(y) { |
34 | 9x |
uni <- if (is.factor(y)) { |
35 | 7x |
levels(y) |
36 |
} else { |
|
37 | 2x |
unique(y) |
38 |
} |
|
39 | 9x |
factor(uni, levels = uni) |
40 |
}) |
|
41 | ||
42 | 4x |
all_lvl_df <- expand.grid(lvl) |
43 | ||
44 | 4x |
all_lvl <- all_lvl_df[, cols, drop = FALSE] %>% |
45 | 4x |
arrange(across(all_of(cols))) %>% |
46 | 4x |
apply(1, paste, collapse = sep) |
47 | ||
48 | 4x |
x_vec <- x_df[, cols, drop = FALSE] %>% |
49 | 4x |
apply(1, paste, collapse = sep) |
50 | ||
51 | 4x |
existing_lvl <- intersect(all_lvl, x_vec) |
52 | 4x |
x_fact <- factor(x_vec, existing_lvl) |
53 | ||
54 | 4x |
adam_db[[tab]][, x_interaction] <- x_fact |
55 | 4x |
adam_db |
56 |
} |
1 |
#' Cutting data by group |
|
2 |
#' |
|
3 |
#' @details Function used to categorize numeric data stored in long format depending on their group. Intervals are |
|
4 |
#' closed on the right (and open on the left). |
|
5 |
#' |
|
6 |
#' @param df (`dataframe`) with a column of data to be cut and a column specifying the group of each observation. |
|
7 |
#' @param col_data (`character`) the column containing the data to be cut. |
|
8 |
#' @param col_group (`character`) the column containing the names of the groups according to which the data should be |
|
9 |
#' split. |
|
10 |
#' @param group (`nested list`) providing for each parameter value that should be analyzed in a categorical way: the |
|
11 |
#' name of the parameter (`character`), a series of breakpoints (`numeric`) where the first breakpoints is typically |
|
12 |
#' `-Inf` and the last `Inf`, and a series of name which will describe each category (`character`). |
|
13 |
#' @param cat_col (`character`) the name of the new column in which the cut label should he stored. |
|
14 |
#' |
|
15 |
#' @export |
|
16 |
#' |
|
17 |
#' @return `data.frame` with a column containing categorical values. |
|
18 |
#' @examples |
|
19 |
#' group <- list( |
|
20 |
#' list( |
|
21 |
#' "Height", |
|
22 |
#' c(-Inf, 150, 170, Inf), |
|
23 |
#' c("=<150", "150-170", ">170") |
|
24 |
#' ), |
|
25 |
#' list( |
|
26 |
#' "Weight", |
|
27 |
#' c(-Inf, 65, Inf), |
|
28 |
#' c("=<65", ">65") |
|
29 |
#' ), |
|
30 |
#' list( |
|
31 |
#' "Age", |
|
32 |
#' c(-Inf, 31, Inf), |
|
33 |
#' c("=<31", ">31") |
|
34 |
#' ), |
|
35 |
#' list( |
|
36 |
#' "PreCondition", |
|
37 |
#' c(-Inf, 1, Inf), |
|
38 |
#' c("=<1", "<1") |
|
39 |
#' ) |
|
40 |
#' ) |
|
41 |
#' data <- data.frame( |
|
42 |
#' SUBJECT = rep(letters[1:10], 4), |
|
43 |
#' PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10), |
|
44 |
#' AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)), |
|
45 |
#' index = 1:40 |
|
46 |
#' ) |
|
47 |
#' |
|
48 |
#' cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories") |
|
49 |
cut_by_group <- function(df, |
|
50 |
col_data, |
|
51 |
col_group, |
|
52 |
group, |
|
53 |
cat_col) { |
|
54 | 4x |
checkmate::assert_data_frame(df) |
55 | 4x |
checkmate::assert_subset(c(col_data, col_group), colnames(df)) |
56 | 4x |
checkmate::assert_numeric(df[, col_data]) |
57 | 4x |
checkmate::assert_list(group) |
58 | ||
59 | 4x |
lapply( |
60 | 4x |
group, |
61 | 4x |
function(list_element) { |
62 | 11x |
checkmate::assert_list(list_element, len = 3, types = c("character", "numeric", "character")) |
63 |
} |
|
64 |
) |
|
65 | ||
66 | 4x |
df[cat_col] <- NA |
67 | ||
68 | 4x |
for (g in group) { |
69 | 10x |
selected_row <- df[[col_group]] == g[[1]] |
70 | ||
71 | 10x |
df[selected_row, cat_col] <- as.character(cut(df[[col_data]][selected_row], breaks = g[[2]], labels = g[[3]])) |
72 |
} |
|
73 | 3x |
df |
74 |
} |
1 |
#' Reorder Two Columns Levels Simultaneously |
|
2 |
#' |
|
3 |
#' @details The function expect a 1:1 matching between the elements of the two selected column. |
|
4 |
#' |
|
5 |
#' @param df (`data.frame`) with two column whose factors should be reordered. |
|
6 |
#' @param primary (`string`) the name of the column on which the levels reordering should be based. |
|
7 |
#' @param secondary (`string`) the name of the column whose levels should be reordered following the levels of the |
|
8 |
#' primary column. |
|
9 |
#' @param levels_primary (`character`) the levels in the desired order. Existing levels that are not included will be |
|
10 |
#' placed afterward in their current order. |
|
11 |
#' |
|
12 |
#' @export |
|
13 |
#' |
|
14 |
#' @examples |
|
15 |
#' |
|
16 |
#' df <- data.frame( |
|
17 |
#' SUBJID = 1:3, |
|
18 |
#' PARAMCD = factor(c("A", "B", "C")), |
|
19 |
#' PARAM = factor(paste("letter", LETTERS[1:3])) |
|
20 |
#' ) |
|
21 |
#' co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B")) |
|
22 |
co_relevels <- function(df, primary, secondary, levels_primary) { |
|
23 | 6x |
checkmate::assert_data_frame(df, min.rows = 1) |
24 | 6x |
checkmate::assert_subset(c(primary, secondary), colnames(df)) |
25 | 6x |
checkmate::assert_character(levels_primary, min.len = 1) |
26 | 6x |
checkmate::assert_vector(df[[primary]], any.missing = FALSE) |
27 | 5x |
checkmate::assert_vector(df[[secondary]], any.missing = FALSE) |
28 | ||
29 | 5x |
df[, primary] <- as.factor(df[[primary]]) |
30 | 5x |
df[, secondary] <- as.factor(df[[secondary]]) |
31 | ||
32 |
# check unique relationship |
|
33 | 5x |
df_key <- df[, c(primary, secondary)] |
34 | 5x |
df_key <- unique(df_key) |
35 | ||
36 | 5x |
if (any(duplicated(df_key[[primary]])) || any(duplicated(df_key[[secondary]]))) { |
37 | 1x |
stop("non univoque relation between values in primary and secondary column") |
38 |
} |
|
39 | ||
40 | 4x |
keys <- setNames(as.character(df_key[[secondary]]), as.character(df_key[[primary]])) |
41 | ||
42 | 4x |
all_levels_primary <- c(levels_primary, setdiff(levels(df[[primary]]), levels_primary)) |
43 | 4x |
all_levels_secondary <- keys[all_levels_primary] |
44 | ||
45 | 4x |
df[, primary] <- factor(df[[primary]], all_levels_primary) |
46 | 4x |
df[, secondary] <- factor(df[[secondary]], all_levels_secondary) |
47 | ||
48 | 4x |
df |
49 |
} |
1 |
#' Transforming Empty Strings and White Spaces to NAs |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("experimental")` |
|
4 |
#' |
|
5 |
#' SAS imports missing data as empty strings or white spaces. This helper function replaces the empty strings and white |
|
6 |
#' space-only character and levels by `NAs`. |
|
7 |
#' |
|
8 |
#' @param x (`vector`) where empty of white space should be transformed to `NAs`. |
|
9 |
#' |
|
10 |
#' @return `character` or `factor` without explicit NA. `logical` and `numeric` are returned as `character`. |
|
11 |
#' |
|
12 |
#' @export |
|
13 |
#' @examples |
|
14 |
#' char1 <- c(" ", " ", "a", "b", "", "") |
|
15 |
#' h_ws_to_na(char1) |
|
16 |
#' |
|
17 |
#' fact1 <- as.factor(char1) |
|
18 |
#' h_ws_to_na(fact1) |
|
19 |
#' |
|
20 |
#' num1 <- c(1:10) |
|
21 |
#' h_ws_to_na(num1) |
|
22 |
#' |
|
23 |
#' logi1 <- c(TRUE, FALSE, NA) |
|
24 |
#' h_ws_to_na(logi1) |
|
25 |
h_ws_to_na <- function(x) { |
|
26 | 9x |
if (is.factor(x)) { |
27 | 2x |
levels_x <- levels(x) |
28 | ||
29 | 2x |
ws_levels <- grepl("^\\s*$", levels_x) | levels_x == "" |
30 | ||
31 | 2x |
levels(x)[ws_levels] <- NA |
32 | 7x |
} else if (is.character(x)) { |
33 | 6x |
ws_char <- grepl("^\\s*$", x) | x == "" |
34 | ||
35 | 6x |
x[ws_char] <- NA |
36 |
} else { |
|
37 | 1x |
x <- as.character(x) |
38 |
} |
|
39 | 9x |
x |
40 |
} |
|
41 | ||
42 |
#' Transforming Empty Strings and White Spaces to Explicit NAs |
|
43 |
#' |
|
44 |
#' @description `r lifecycle::badge("experimental")` |
|
45 |
#' |
|
46 |
#' SAS imports missing data as empty strings or white spaces. This helper function is a thin wrapper around |
|
47 |
#' [dunlin::h_ws_to_na] which replaces them with explicit missing level. |
|
48 |
#' |
|
49 |
#' @param x (`vector`) where empty of white space should be transformed to `NAs`. |
|
50 |
#' @param na_level (`character`) replacement of the missing levels. |
|
51 |
#' |
|
52 |
#' @return `factor` with explicit NA |
|
53 |
#' |
|
54 |
#' @export |
|
55 |
#' @examples |
|
56 |
#' char1 <- c(" ", " ", "a", "b", "", "") |
|
57 |
#' h_ws_to_explicit_na(char1) |
|
58 |
#' |
|
59 |
#' fact1 <- as.factor(char1) |
|
60 |
#' h_ws_to_explicit_na(fact1) |
|
61 |
#' |
|
62 |
#' num1 <- c(1, 2, NA) |
|
63 |
#' h_ws_to_explicit_na(num1) |
|
64 |
#' |
|
65 |
#' logi1 <- c(TRUE, FALSE, NA) |
|
66 |
#' h_ws_to_explicit_na(logi1) |
|
67 |
h_ws_to_explicit_na <- function(x, na_level = "<Missing>") { |
|
68 | 7x |
checkmate::assert_character(na_level) |
69 | ||
70 | 7x |
res <- forcats::fct_na_value_to_level(h_ws_to_na(x), na_level) |
71 | ||
72 | 7x |
if (na_level %in% res) { |
73 | 5x |
forcats::fct_relevel(res, na_level, after = Inf) |
74 |
} else { |
|
75 | 2x |
forcats::fct_drop(res, only = na_level) |
76 |
} |
|
77 |
} |
|
78 | ||
79 |
#' Transforming Empty Strings and White Spaces to Explicit NAs while Preserving Label |
|
80 |
#' |
|
81 |
#' @details This function preserves the label attribute. |
|
82 |
#' |
|
83 |
#' @param x (`vector`) input to be turned into factor with explicit missing level. |
|
84 |
#' @param na_level (`character`) the label to encode missing levels. |
|
85 |
#' |
|
86 |
#' @return `factor` with explicit NA and the same label as the input. |
|
87 |
#' |
|
88 |
#' @export |
|
89 |
#' @examples |
|
90 |
#' char1 <- c(" ", " ", "a", "b", "", "", NA) |
|
91 |
#' attr(char1, "label") <- "my_label" |
|
92 |
#' |
|
93 |
#' h_as_factor(char1) |
|
94 |
h_as_factor <- function(x, na_level = "<Missing>") { |
|
95 | 4x |
checkmate::assert_vector(x) |
96 | ||
97 | 4x |
init_lab <- attr(x, "label") |
98 | ||
99 | 4x |
res <- h_ws_to_explicit_na(x, na_level = na_level) |
100 | ||
101 | 4x |
attr(res, "label") <- init_lab |
102 | ||
103 | 4x |
res |
104 |
} |
|
105 | ||
106 |
#' Setting the Label Attribute |
|
107 |
#' |
|
108 |
#' @param var (`object`) whose label attribute can be set. |
|
109 |
#' @param label (`character`) the label to add. |
|
110 |
#' |
|
111 |
#' @return `object` with label attribute. |
|
112 |
#' |
|
113 |
#' @export |
|
114 |
#' @examples |
|
115 |
#' x <- c(1:10) |
|
116 |
#' attr(x, "label") |
|
117 |
#' |
|
118 |
#' y <- attr_label(x, "my_label") |
|
119 |
#' attr(y, "label") |
|
120 |
attr_label <- function(var, label) { |
|
121 | 89x |
checkmate::assert_character(label) |
122 | ||
123 | 88x |
x <- var |
124 | 88x |
attr(x, "label") <- label |
125 | ||
126 | 88x |
x |
127 |
} |
|
128 | ||
129 |
#' Setting the Label Attribute to Data Frame Columns |
|
130 |
#' |
|
131 |
#' @param df (`data.frame`). |
|
132 |
#' @param label (`character`) the labels to add. |
|
133 |
#' |
|
134 |
#' @return `data.frame` with label attributes. |
|
135 |
#' |
|
136 |
#' @export |
|
137 |
#' @examples |
|
138 |
#' res <- attr_label_df(mtcars, letters[1:11]) |
|
139 |
#' res |
|
140 |
#' lapply(res, attr, "label") |
|
141 |
attr_label_df <- function(df, label) { |
|
142 | 20x |
checkmate::assert_data_frame(df) |
143 | 20x |
checkmate::assert_character(label, len = ncol(df)) |
144 | ||
145 | 19x |
res <- mapply(attr_label, var = df, label = as.list(label), SIMPLIFY = FALSE) |
146 | 19x |
as.data.frame(res) |
147 |
} |
1 |
#' Encode Categorical Missing Values in a `list` of `data.frame` |
|
2 |
#' |
|
3 |
#' @details This is a helper function to encode missing values (i.e `NA` and `empty string`) of every `character` and |
|
4 |
#' `factor` variable found in a `list` of `data.frame`. The `label` attribute of the columns is preserved. |
|
5 |
#' |
|
6 |
#' @param data (`list` of `data.frame`) to be transformed. |
|
7 |
#' @param omit_tables (`character`) the names of the tables to omit from processing. |
|
8 |
#' @param omit_columns (`character`) the names of the columns to omit from processing. |
|
9 |
#' @param char_as_factor (`logical`) should character columns be converted into factor. |
|
10 |
#' @param na_level (`string`) the label to encode missing levels. |
|
11 |
#' |
|
12 |
#' @return `list` of `data.frame` object with explicit missing levels. |
|
13 |
#' @export |
|
14 |
#' |
|
15 |
#' @examples |
|
16 |
#' |
|
17 |
#' df1 <- data.frame( |
|
18 |
#' "char" = c("a", "b", NA, "a", "k", "x"), |
|
19 |
#' "char2" = c("A", "B", NA, "A", "K", "X"), |
|
20 |
#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), |
|
21 |
#' "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA) |
|
22 |
#' ) |
|
23 |
#' df2 <- data.frame( |
|
24 |
#' "char" = c("a", "b", NA, "a", "k", "x"), |
|
25 |
#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), |
|
26 |
#' "num" = c(1:5, NA) |
|
27 |
#' ) |
|
28 |
#' df3 <- data.frame( |
|
29 |
#' "char" = c(NA, NA, "A") |
|
30 |
#' ) |
|
31 |
#' |
|
32 |
#' db <- list(df1 = df1, df2 = df2, df3 = df3) |
|
33 |
#' |
|
34 |
#' ls_explicit_na(db) |
|
35 |
#' ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2") |
|
36 |
#' |
|
37 |
ls_explicit_na <- function(data, |
|
38 |
omit_tables = NULL, |
|
39 |
omit_columns = NULL, |
|
40 |
char_as_factor = TRUE, |
|
41 |
na_level = "<Missing>") { |
|
42 | 3x |
checkmate::assert_list(data, types = "data.frame", names = "unique") |
43 | 3x |
checkmate::assert_character(omit_tables, null.ok = TRUE) |
44 | 3x |
checkmate::assert_character(omit_columns, null.ok = TRUE) |
45 | 3x |
checkmate::assert_flag(char_as_factor) |
46 | 3x |
checkmate::assert_string(na_level) |
47 | ||
48 | 3x |
modif_tab <- setdiff(names(data), omit_tables) |
49 | 3x |
if (length(modif_tab) < 1) { |
50 | 1x |
return(data) |
51 |
} |
|
52 | ||
53 | 2x |
data[modif_tab] <- lapply( |
54 | 2x |
data[modif_tab], |
55 | 2x |
h_df_explicit, |
56 | 2x |
omit_columns = omit_columns, |
57 | 2x |
char_as_factor = char_as_factor, |
58 | 2x |
na_level = na_level |
59 |
) |
|
60 | ||
61 | 2x |
data |
62 |
} |
|
63 | ||
64 |
#' Encode Categorical Missing Values in a `data.frame`. |
|
65 |
#' |
|
66 |
#' @inheritParams ls_explicit_na |
|
67 |
#' @keywords internal |
|
68 |
#' |
|
69 |
#' @examples |
|
70 |
#' \dontrun{ |
|
71 |
#' df <- data.frame( |
|
72 |
#' "char" = c("a", "b", NA, "a", "k", "x"), |
|
73 |
#' "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), |
|
74 |
#' "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA), |
|
75 |
#' "num" = c(1:5, NA) |
|
76 |
#' ) |
|
77 |
#' |
|
78 |
#' h_df_explicit(df) |
|
79 |
#' h_df_explicit(df, omit_columns = c("fact", "x")) |
|
80 |
#' } |
|
81 |
h_df_explicit <- function(df, |
|
82 |
omit_columns = NULL, |
|
83 |
char_as_factor = TRUE, |
|
84 |
na_level = "<Missing>") { |
|
85 | 3x |
na_list <- list(x = c("", NA)) |
86 | 3x |
names(na_list) <- na_level |
87 | 3x |
na_rule <- rule(.lst = na_list) |
88 | ||
89 | 3x |
df %>% |
90 | 3x |
mutate( |
91 | 3x |
across( |
92 | 3x |
where(~ is.character(.x) | is.factor(.x)) & !any_of(.env$omit_columns), |
93 | 3x |
~ reformat(.x, format = .env$na_rule, .string_as_fct = .env$char_as_factor, .na_last = TRUE) |
94 |
) |
|
95 |
) |
|
96 |
} |
1 |
#' Assert Nested List can be used as Format Argument in Reformat. |
|
2 |
#' |
|
3 |
#' @param object (`list`) to assert. |
|
4 |
#' @return invisible `TRUE` or an error message if the criteria are not fulfilled. |
|
5 |
#' |
|
6 |
#' @export |
|
7 |
#' @examples |
|
8 |
#' format <- list( |
|
9 |
#' df1 = list( |
|
10 |
#' var1 = rule("X" = "x", "N" = c(NA, "")) |
|
11 |
#' ), |
|
12 |
#' df2 = list( |
|
13 |
#' var1 = rule(), |
|
14 |
#' var2 = rule("f11" = "F11", "NN" = NA), |
|
15 |
#' var3 = empty_rule |
|
16 |
#' ), |
|
17 |
#' df3 = list() |
|
18 |
#' ) |
|
19 |
#' |
|
20 |
#' assert_valid_format(format) |
|
21 |
assert_valid_format <- function(object) { |
|
22 | 4x |
coll <- checkmate::makeAssertCollection() |
23 | ||
24 |
# Check object. |
|
25 | 4x |
checkmate::assert_list(object, names = "unique", type = "list", add = coll) |
26 | ||
27 |
# Check table level. |
|
28 | 4x |
mapply( |
29 | 4x |
function(x, xtable) { |
30 | 8x |
checkmate::assert_list( |
31 | 8x |
x, |
32 | 8x |
names = "unique", |
33 | 8x |
types = "rule", |
34 | 8x |
any.missing = FALSE, |
35 | 8x |
.var.name = paste0("[", xtable, "]"), |
36 | 8x |
add = coll |
37 |
) |
|
38 |
}, |
|
39 | 4x |
object, |
40 | 4x |
names(object) |
41 |
) |
|
42 | ||
43 | 4x |
checkmate::reportAssertions(coll) |
44 |
} |
|
45 | ||
46 |
#' Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat. |
|
47 |
#' |
|
48 |
#' @param object (`list`) to assert. |
|
49 |
#' @return invisible `TRUE` or an error message if the criteria are not fulfilled. |
|
50 |
#' |
|
51 |
#' @export |
|
52 |
#' @examples |
|
53 |
#' format <- list( |
|
54 |
#' df1 = list( |
|
55 |
#' var1 = list("X" = "x", "N" = c(NA, "")) |
|
56 |
#' ), |
|
57 |
#' df2 = list( |
|
58 |
#' var1 = list(), |
|
59 |
#' var2 = list("f11" = "F11", "NN" = NA) |
|
60 |
#' ), |
|
61 |
#' df3 = list() |
|
62 |
#' ) |
|
63 |
#' |
|
64 |
#' assert_valid_list_format(format) |
|
65 |
assert_valid_list_format <- function(object) { |
|
66 | 2x |
coll <- checkmate::makeAssertCollection() |
67 | ||
68 |
# Check object. |
|
69 | 2x |
checkmate::assert_list(object, names = "unique", type = "list", add = coll) |
70 | ||
71 |
# Check table level. |
|
72 | 2x |
mapply( |
73 | 2x |
function(x, xtable) { |
74 | 4x |
checkmate::assert_list( |
75 | 4x |
x, |
76 | 4x |
names = "unique", |
77 | 4x |
types = "list", |
78 | 4x |
any.missing = FALSE, |
79 | 4x |
.var.name = paste0("[", xtable, "]"), |
80 | 4x |
add = coll |
81 |
) |
|
82 |
}, |
|
83 | 2x |
object, |
84 | 2x |
names(object) |
85 |
) |
|
86 | ||
87 |
# Check variable level. |
|
88 | 2x |
mapply( |
89 | 2x |
function(x, xtable) { |
90 | 4x |
xvar <- names(x) |
91 | 4x |
mapply( |
92 | 4x |
function(x, xvar) { |
93 | 8x |
checkmate::assert_list( |
94 | 8x |
x, |
95 | 8x |
names = "unique", |
96 | 8x |
type = c("character", "numeric", "logical"), |
97 | 8x |
.var.name = paste0("[", xtable, ".", xvar, "]"), |
98 | 8x |
add = coll |
99 |
) |
|
100 |
}, |
|
101 | 4x |
x, |
102 | 4x |
xvar |
103 |
) |
|
104 |
}, |
|
105 | 2x |
object, |
106 | 2x |
names(object) |
107 |
) |
|
108 | ||
109 | 2x |
checkmate::reportAssertions(coll) |
110 |
} |
1 |
whisker_env <- NULL |
|
2 | ||
3 |
.onLoad <- function(libname, pkgname) { |
|
4 | ! |
default_whiskers <- c(patient_label = "patients") |
5 | ! |
whisker_env <<- new.env(parent = globalenv()) |
6 | ! |
add_whisker(default_whiskers) |
7 |
} |