1 |
#' `teal` module: Distribution analysis |
|
2 |
#' |
|
3 |
#' Module is designed to explore the distribution of a single variable within a given dataset. |
|
4 |
#' It offers several tools, such as histograms, Q-Q plots, and various statistical tests to |
|
5 |
#' visually and statistically analyze the variable's distribution. |
|
6 |
#' |
|
7 |
#' @inheritParams teal::module |
|
8 |
#' @inheritParams teal.widgets::standard_layout |
|
9 |
#' @inheritParams shared_params |
|
10 |
#' |
|
11 |
#' @param dist_var (`data_extract_spec` or `list` of multiple `data_extract_spec`) |
|
12 |
#' Variable(s) for which the distribution will be analyzed. |
|
13 |
#' @param strata_var (`data_extract_spec` or `list` of multiple `data_extract_spec`) |
|
14 |
#' Categorical variable used to split the distribution analysis. |
|
15 |
#' @param group_var (`data_extract_spec` or `list` of multiple `data_extract_spec`) |
|
16 |
#' Variable used for faceting plot into multiple panels. |
|
17 |
#' @param freq (`logical`) optional, whether to display frequency (`TRUE`) or density (`FALSE`). |
|
18 |
#' Defaults to density (`FALSE`). |
|
19 |
#' @param bins (`integer(1)` or `integer(3)`) optional, specifies the number of bins for the histogram. |
|
20 |
#' - When the length of `bins` is one: The histogram bins will have a fixed size based on the `bins` provided. |
|
21 |
#' - When the length of `bins` is three: The histogram bins are dynamically adjusted based on vector of `value`, `min`, |
|
22 |
#' and `max`. |
|
23 |
#' Defaults to `c(30L, 1L, 100L)`. |
|
24 |
#' |
|
25 |
#' @templateVar ggnames "Histogram", "QQplot" |
|
26 |
#' @template ggplot2_args_multi |
|
27 |
#' |
|
28 |
#' @inherit shared_params return |
|
29 |
#' |
|
30 |
#' @examplesShinylive |
|
31 |
#' library(teal.modules.general) |
|
32 |
#' interactive <- function() TRUE |
|
33 |
#' {{ next_example }} |
|
34 |
# nolint start: line_length_linter. |
|
35 |
#' @examplesIf require("ggpmisc", quietly = TRUE) && require("ggpp", quietly = TRUE) && require("goftest", quietly = TRUE) && require("MASS", quietly = TRUE) && require("broom", quietly = TRUE) |
|
36 |
# nolint end: line_length_linter. |
|
37 |
#' # general data example |
|
38 |
#' data <- teal_data() |
|
39 |
#' data <- within(data, { |
|
40 |
#' iris <- iris |
|
41 |
#' }) |
|
42 |
#' datanames(data) <- "iris" |
|
43 |
#' |
|
44 |
#' app <- init( |
|
45 |
#' data = data, |
|
46 |
#' modules = list( |
|
47 |
#' tm_g_distribution( |
|
48 |
#' dist_var = data_extract_spec( |
|
49 |
#' dataname = "iris", |
|
50 |
#' select = select_spec(variable_choices("iris"), "Petal.Length") |
|
51 |
#' ) |
|
52 |
#' ) |
|
53 |
#' ) |
|
54 |
#' ) |
|
55 |
#' if (interactive()) { |
|
56 |
#' shinyApp(app$ui, app$server) |
|
57 |
#' } |
|
58 |
#' |
|
59 |
#' @examplesShinylive |
|
60 |
#' library(teal.modules.general) |
|
61 |
#' interactive <- function() TRUE |
|
62 |
#' {{ next_example }} |
|
63 |
# nolint start: line_length_linter. |
|
64 |
#' @examplesIf require("ggpmisc", quietly = TRUE) && require("ggpp", quietly = TRUE) && require("goftest", quietly = TRUE) && require("MASS", quietly = TRUE) && require("broom", quietly = TRUE) |
|
65 |
# nolint end: line_length_linter. |
|
66 |
#' # CDISC data example |
|
67 |
#' data <- teal_data() |
|
68 |
#' data <- within(data, { |
|
69 |
#' ADSL <- rADSL |
|
70 |
#' }) |
|
71 |
#' datanames(data) <- c("ADSL") |
|
72 |
#' join_keys(data) <- default_cdisc_join_keys[datanames(data)] |
|
73 |
#' |
|
74 |
#' vars1 <- choices_selected( |
|
75 |
#' variable_choices(data[["ADSL"]], c("ARM", "COUNTRY", "SEX")), |
|
76 |
#' selected = NULL |
|
77 |
#' ) |
|
78 |
#' |
|
79 |
#' app <- init( |
|
80 |
#' data = data, |
|
81 |
#' modules = modules( |
|
82 |
#' tm_g_distribution( |
|
83 |
#' dist_var = data_extract_spec( |
|
84 |
#' dataname = "ADSL", |
|
85 |
#' select = select_spec( |
|
86 |
#' choices = variable_choices(data[["ADSL"]], c("AGE", "BMRKR1")), |
|
87 |
#' selected = "BMRKR1", |
|
88 |
#' multiple = FALSE, |
|
89 |
#' fixed = FALSE |
|
90 |
#' ) |
|
91 |
#' ), |
|
92 |
#' strata_var = data_extract_spec( |
|
93 |
#' dataname = "ADSL", |
|
94 |
#' filter = filter_spec( |
|
95 |
#' vars = vars1, |
|
96 |
#' multiple = TRUE |
|
97 |
#' ) |
|
98 |
#' ), |
|
99 |
#' group_var = data_extract_spec( |
|
100 |
#' dataname = "ADSL", |
|
101 |
#' filter = filter_spec( |
|
102 |
#' vars = vars1, |
|
103 |
#' multiple = TRUE |
|
104 |
#' ) |
|
105 |
#' ) |
|
106 |
#' ) |
|
107 |
#' ) |
|
108 |
#' ) |
|
109 |
#' if (interactive()) { |
|
110 |
#' shinyApp(app$ui, app$server) |
|
111 |
#' } |
|
112 |
#' |
|
113 |
#' @export |
|
114 |
#' |
|
115 |
tm_g_distribution <- function(label = "Distribution Module", |
|
116 |
dist_var, |
|
117 |
strata_var = NULL, |
|
118 |
group_var = NULL, |
|
119 |
freq = FALSE, |
|
120 |
ggtheme = c("gray", "bw", "linedraw", "light", "dark", "minimal", "classic", "void"), |
|
121 |
ggplot2_args = teal.widgets::ggplot2_args(), |
|
122 |
bins = c(30L, 1L, 100L), |
|
123 |
plot_height = c(600, 200, 2000), |
|
124 |
plot_width = NULL, |
|
125 |
pre_output = NULL, |
|
126 |
post_output = NULL) { |
|
127 | ! |
message("Initializing tm_g_distribution") |
128 | ||
129 |
# Requires Suggested packages |
|
130 | ! |
extra_packages <- c("ggpmisc", "ggpp", "goftest", "MASS", "broom") |
131 | ! |
missing_packages <- Filter(function(x) !requireNamespace(x, quietly = TRUE), extra_packages) |
132 | ! |
if (length(missing_packages) > 0L) { |
133 | ! |
stop(sprintf( |
134 | ! |
"Cannot load package(s): %s.\nInstall or restart your session.", |
135 | ! |
toString(missing_packages) |
136 |
)) |
|
137 |
} |
|
138 | ||
139 |
# Normalize the parameters |
|
140 | ! |
if (inherits(dist_var, "data_extract_spec")) dist_var <- list(dist_var) |
141 | ! |
if (inherits(strata_var, "data_extract_spec")) strata_var <- list(strata_var) |
142 | ! |
if (inherits(group_var, "data_extract_spec")) group_var <- list(group_var) |
143 | ! |
if (inherits(ggplot2_args, "ggplot2_args")) ggplot2_args <- list(default = ggplot2_args) |
144 | ||
145 |
# Start of assertions |
|
146 | ! |
checkmate::assert_string(label) |
147 | ||
148 | ! |
checkmate::assert_list(dist_var, "data_extract_spec") |
149 | ! |
checkmate::assert_false(dist_var[[1L]]$select$multiple) |
150 | ||
151 | ! |
checkmate::assert_list(strata_var, types = "data_extract_spec", null.ok = TRUE) |
152 | ! |
checkmate::assert_list(group_var, types = "data_extract_spec", null.ok = TRUE) |
153 | ! |
checkmate::assert_flag(freq) |
154 | ! |
ggtheme <- match.arg(ggtheme) |
155 | ||
156 | ! |
plot_choices <- c("Histogram", "QQplot") |
157 | ! |
checkmate::assert_list(ggplot2_args, types = "ggplot2_args") |
158 | ! |
checkmate::assert_subset(names(ggplot2_args), c("default", plot_choices)) |
159 | ||
160 | ! |
if (length(bins) == 1) { |
161 | ! |
checkmate::assert_numeric(bins, any.missing = FALSE, lower = 1) |
162 |
} else { |
|
163 | ! |
checkmate::assert_numeric(bins, len = 3, any.missing = FALSE, lower = 1) |
164 | ! |
checkmate::assert_numeric(bins[1], lower = bins[2], upper = bins[3], .var.name = "bins") |
165 |
} |
|
166 | ||
167 | ! |
checkmate::assert_numeric(plot_height, len = 3, any.missing = FALSE, finite = TRUE) |
168 | ! |
checkmate::assert_numeric(plot_height[1], lower = plot_height[2], upper = plot_height[3], .var.name = "plot_height") |
169 | ! |
checkmate::assert_numeric(plot_width, len = 3, any.missing = FALSE, null.ok = TRUE, finite = TRUE) |
170 | ! |
checkmate::assert_numeric( |
171 | ! |
plot_width[1], |
172 | ! |
lower = plot_width[2], upper = plot_width[3], null.ok = TRUE, .var.name = "plot_width" |
173 |
) |
|
174 | ||
175 | ! |
checkmate::assert_multi_class(pre_output, c("shiny.tag", "shiny.tag.list", "html"), null.ok = TRUE) |
176 | ! |
checkmate::assert_multi_class(post_output, c("shiny.tag", "shiny.tag.list", "html"), null.ok = TRUE) |
177 |
# End of assertions |
|
178 | ||
179 |
# Make UI args |
|
180 | ! |
args <- as.list(environment()) |
181 | ||
182 | ! |
data_extract_list <- list( |
183 | ! |
dist_var = dist_var, |
184 | ! |
strata_var = strata_var, |
185 | ! |
group_var = group_var |
186 |
) |
|
187 | ||
188 | ! |
ans <- module( |
189 | ! |
label = label, |
190 | ! |
server = srv_distribution, |
191 | ! |
server_args = c( |
192 | ! |
data_extract_list, |
193 | ! |
list(plot_height = plot_height, plot_width = plot_width, ggplot2_args = ggplot2_args) |
194 |
), |
|
195 | ! |
ui = ui_distribution, |
196 | ! |
ui_args = args, |
197 | ! |
datanames = teal.transform::get_extract_datanames(data_extract_list) |
198 |
) |
|
199 | ! |
attr(ans, "teal_bookmarkable") <- TRUE |
200 | ! |
ans |
201 |
} |
|
202 | ||
203 |
# UI function for the distribution module |
|
204 |
ui_distribution <- function(id, ...) { |
|
205 | ! |
args <- list(...) |
206 | ! |
ns <- NS(id) |
207 | ! |
is_single_dataset_value <- teal.transform::is_single_dataset(args$dist_var, args$strata_var, args$group_var) |
208 | ||
209 | ! |
teal.widgets::standard_layout( |
210 | ! |
output = teal.widgets::white_small_well( |
211 | ! |
tabsetPanel( |
212 | ! |
id = ns("tabs"), |
213 | ! |
tabPanel("Histogram", teal.widgets::plot_with_settings_ui(id = ns("hist_plot"))), |
214 | ! |
tabPanel("QQplot", teal.widgets::plot_with_settings_ui(id = ns("qq_plot"))) |
215 |
), |
|
216 | ! |
tags$h3("Statistics Table"), |
217 | ! |
DT::dataTableOutput(ns("summary_table")), |
218 | ! |
tags$h3("Tests"), |
219 | ! |
DT::dataTableOutput(ns("t_stats")) |
220 |
), |
|
221 | ! |
encoding = tags$div( |
222 |
### Reporter |
|
223 | ! |
teal.reporter::simple_reporter_ui(ns("simple_reporter")), |
224 |
### |
|
225 | ! |
tags$label("Encodings", class = "text-primary"), |
226 | ! |
teal.transform::datanames_input(args[c("dist_var", "strata_var")]), |
227 | ! |
teal.transform::data_extract_ui( |
228 | ! |
id = ns("dist_i"), |
229 | ! |
label = "Variable", |
230 | ! |
data_extract_spec = args$dist_var, |
231 | ! |
is_single_dataset = is_single_dataset_value |
232 |
), |
|
233 | ! |
if (!is.null(args$group_var)) { |
234 | ! |
tagList( |
235 | ! |
teal.transform::data_extract_ui( |
236 | ! |
id = ns("group_i"), |
237 | ! |
label = "Group by", |
238 | ! |
data_extract_spec = args$group_var, |
239 | ! |
is_single_dataset = is_single_dataset_value |
240 |
), |
|
241 | ! |
uiOutput(ns("scales_types_ui")) |
242 |
) |
|
243 |
}, |
|
244 | ! |
if (!is.null(args$strata_var)) { |
245 | ! |
teal.transform::data_extract_ui( |
246 | ! |
id = ns("strata_i"), |
247 | ! |
label = "Stratify by", |
248 | ! |
data_extract_spec = args$strata_var, |
249 | ! |
is_single_dataset = is_single_dataset_value |
250 |
) |
|
251 |
}, |
|
252 | ! |
teal.widgets::panel_group( |
253 | ! |
conditionalPanel( |
254 | ! |
condition = paste0("input['", ns("tabs"), "'] == 'Histogram'"), |
255 | ! |
teal.widgets::panel_item( |
256 | ! |
"Histogram", |
257 | ! |
teal.widgets::optionalSliderInputValMinMax(ns("bins"), "Bins", args$bins, ticks = FALSE, step = 1), |
258 | ! |
shinyWidgets::prettyRadioButtons( |
259 | ! |
ns("main_type"), |
260 | ! |
label = "Plot Type:", |
261 | ! |
choices = c("Density", "Frequency"), |
262 | ! |
selected = if (!args$freq) "Density" else "Frequency", |
263 | ! |
bigger = FALSE, |
264 | ! |
inline = TRUE |
265 |
), |
|
266 | ! |
checkboxInput(ns("add_dens"), label = "Overlay Density", value = TRUE), |
267 | ! |
collapsed = FALSE |
268 |
) |
|
269 |
), |
|
270 | ! |
conditionalPanel( |
271 | ! |
condition = paste0("input['", ns("tabs"), "'] == 'QQplot'"), |
272 | ! |
teal.widgets::panel_item( |
273 | ! |
"QQ Plot", |
274 | ! |
checkboxInput(ns("qq_line"), label = "Add diagonal line(s)", TRUE), |
275 | ! |
collapsed = FALSE |
276 |
) |
|
277 |
), |
|
278 | ! |
conditionalPanel( |
279 | ! |
condition = paste0("input['", ns("main_type"), "'] == 'Density'"), |
280 | ! |
teal.widgets::panel_item( |
281 | ! |
"Theoretical Distribution", |
282 | ! |
teal.widgets::optionalSelectInput( |
283 | ! |
ns("t_dist"), |
284 | ! |
tags$div( |
285 | ! |
class = "teal-tooltip", |
286 | ! |
tagList( |
287 | ! |
"Distribution:", |
288 | ! |
icon("circle-info"), |
289 | ! |
tags$span( |
290 | ! |
class = "tooltiptext", |
291 | ! |
"Default parameters are optimized with MASS::fitdistr function." |
292 |
) |
|
293 |
) |
|
294 |
), |
|
295 | ! |
choices = c("normal", "lognormal", "gamma", "unif"), |
296 | ! |
selected = NULL, |
297 | ! |
multiple = FALSE |
298 |
), |
|
299 | ! |
numericInput(ns("dist_param1"), label = "param1", value = NULL), |
300 | ! |
numericInput(ns("dist_param2"), label = "param2", value = NULL), |
301 | ! |
tags$span(actionButton(ns("params_reset"), "Default params")), |
302 | ! |
collapsed = FALSE |
303 |
) |
|
304 |
) |
|
305 |
), |
|
306 | ! |
teal.widgets::panel_item( |
307 | ! |
"Tests", |
308 | ! |
teal.widgets::optionalSelectInput( |
309 | ! |
ns("dist_tests"), |
310 | ! |
"Tests:", |
311 | ! |
choices = c( |
312 | ! |
"Shapiro-Wilk", |
313 | ! |
if (!is.null(args$strata_var)) "t-test (two-samples, not paired)", |
314 | ! |
if (!is.null(args$strata_var)) "one-way ANOVA", |
315 | ! |
if (!is.null(args$strata_var)) "Fligner-Killeen", |
316 | ! |
if (!is.null(args$strata_var)) "F-test", |
317 | ! |
"Kolmogorov-Smirnov (one-sample)", |
318 | ! |
"Anderson-Darling (one-sample)", |
319 | ! |
"Cramer-von Mises (one-sample)", |
320 | ! |
if (!is.null(args$strata_var)) "Kolmogorov-Smirnov (two-samples)" |
321 |
), |
|
322 | ! |
selected = NULL |
323 |
) |
|
324 |
), |
|
325 | ! |
teal.widgets::panel_item( |
326 | ! |
"Statistics Table", |
327 | ! |
sliderInput(ns("roundn"), "Round to n digits", min = 0, max = 10, value = 2) |
328 |
), |
|
329 | ! |
teal.widgets::panel_item( |
330 | ! |
title = "Plot settings", |
331 | ! |
selectInput( |
332 | ! |
inputId = ns("ggtheme"), |
333 | ! |
label = "Theme (by ggplot):", |
334 | ! |
choices = ggplot_themes, |
335 | ! |
selected = args$ggtheme, |
336 | ! |
multiple = FALSE |
337 |
) |
|
338 |
) |
|
339 |
), |
|
340 | ! |
forms = tagList( |
341 | ! |
teal.widgets::verbatim_popup_ui(ns("rcode"), "Show R code") |
342 |
), |
|
343 | ! |
pre_output = args$pre_output, |
344 | ! |
post_output = args$post_output |
345 |
) |
|
346 |
} |
|
347 | ||
348 |
# Server function for the distribution module |
|
349 |
srv_distribution <- function(id, |
|
350 |
data, |
|
351 |
reporter, |
|
352 |
filter_panel_api, |
|
353 |
dist_var, |
|
354 |
strata_var, |
|
355 |
group_var, |
|
356 |
plot_height, |
|
357 |
plot_width, |
|
358 |
ggplot2_args) { |
|
359 | ! |
with_reporter <- !missing(reporter) && inherits(reporter, "Reporter") |
360 | ! |
with_filter <- !missing(filter_panel_api) && inherits(filter_panel_api, "FilterPanelAPI") |
361 | ! |
checkmate::assert_class(data, "reactive") |
362 | ! |
checkmate::assert_class(isolate(data()), "teal_data") |
363 | ! |
moduleServer(id, function(input, output, session) { |
364 | ! |
teal.logger::log_shiny_input_changes(input, namespace = "teal.modules.general") |
365 | ||
366 | ! |
setBookmarkExclude("params_reset") |
367 | ||
368 | ! |
ns <- session$ns |
369 | ||
370 | ! |
rule_req <- function(value) { |
371 | ! |
if (isTRUE(input$dist_tests %in% c( |
372 | ! |
"Fligner-Killeen", |
373 | ! |
"t-test (two-samples, not paired)", |
374 | ! |
"F-test", |
375 | ! |
"Kolmogorov-Smirnov (two-samples)", |
376 | ! |
"one-way ANOVA" |
377 |
))) { |
|
378 | ! |
if (!shinyvalidate::input_provided(value)) { |
379 | ! |
"Please select stratify variable." |
380 |
} |
|
381 |
} |
|
382 |
} |
|
383 | ! |
rule_dupl <- function(...) { |
384 | ! |
if (identical(input$dist_tests, "Fligner-Killeen")) { |
385 | ! |
strata <- selector_list()$strata_i()$select |
386 | ! |
group <- selector_list()$group_i()$select |
387 | ! |
if (isTRUE(strata == group)) { |
388 | ! |
"Please select different variables for strata and group." |
389 |
} |
|
390 |
} |
|
391 |
} |
|
392 | ||
393 | ! |
selector_list <- teal.transform::data_extract_multiple_srv( |
394 | ! |
data_extract = list( |
395 | ! |
dist_i = dist_var, |
396 | ! |
strata_i = strata_var, |
397 | ! |
group_i = group_var |
398 |
), |
|
399 | ! |
data, |
400 | ! |
select_validation_rule = list( |
401 | ! |
dist_i = shinyvalidate::sv_required("Please select a variable") |
402 |
), |
|
403 | ! |
filter_validation_rule = list( |
404 | ! |
strata_i = shinyvalidate::compose_rules( |
405 | ! |
rule_req, |
406 | ! |
rule_dupl |
407 |
), |
|
408 | ! |
group_i = rule_dupl |
409 |
) |
|
410 |
) |
|
411 | ||
412 | ! |
iv_r <- reactive({ |
413 | ! |
iv <- shinyvalidate::InputValidator$new() |
414 | ! |
teal.transform::compose_and_enable_validators(iv, selector_list, validator_names = "dist_i") |
415 |
}) |
|
416 | ||
417 | ! |
iv_r_dist <- reactive({ |
418 | ! |
iv <- shinyvalidate::InputValidator$new() |
419 | ! |
teal.transform::compose_and_enable_validators( |
420 | ! |
iv, selector_list, |
421 | ! |
validator_names = c("strata_i", "group_i") |
422 |
) |
|
423 |
}) |
|
424 | ! |
rule_dist_1 <- function(value) { |
425 | ! |
if (!is.null(input$t_dist)) { |
426 | ! |
switch(input$t_dist, |
427 | ! |
"normal" = if (!shinyvalidate::input_provided(value)) "mean is required", |
428 | ! |
"lognormal" = if (!shinyvalidate::input_provided(value)) "meanlog is required", |
429 | ! |
"gamma" = { |
430 | ! |
if (!shinyvalidate::input_provided(value)) "shape is required" else if (value <= 0) "shape must be positive" |
431 |
}, |
|
432 | ! |
"unif" = NULL |
433 |
) |
|
434 |
} |
|
435 |
} |
|
436 | ! |
rule_dist_2 <- function(value) { |
437 | ! |
if (!is.null(input$t_dist)) { |
438 | ! |
switch(input$t_dist, |
439 | ! |
"normal" = { |
440 | ! |
if (!shinyvalidate::input_provided(value)) { |
441 | ! |
"sd is required" |
442 | ! |
} else if (value < 0) { |
443 | ! |
"sd must be non-negative" |
444 |
} |
|
445 |
}, |
|
446 | ! |
"lognormal" = { |
447 | ! |
if (!shinyvalidate::input_provided(value)) { |
448 | ! |
"sdlog is required" |
449 | ! |
} else if (value < 0) { |
450 | ! |
"sdlog must be non-negative" |
451 |
} |
|
452 |
}, |
|
453 | ! |
"gamma" = { |
454 | ! |
if (!shinyvalidate::input_provided(value)) { |
455 | ! |
"rate is required" |
456 | ! |
} else if (value <= 0) { |
457 | ! |
"rate must be positive" |
458 |
} |
|
459 |
}, |
|
460 | ! |
"unif" = NULL |
461 |
) |
|
462 |
} |
|
463 |
} |
|
464 | ! |
rule_dist <- function(value) { |
465 | ! |
if (isTRUE(input$tabs == "QQplot" || |
466 | ! |
input$dist_tests %in% c( |
467 | ! |
"Kolmogorov-Smirnov (one-sample)", |
468 | ! |
"Anderson-Darling (one-sample)", |
469 | ! |
"Cramer-von Mises (one-sample)" |
470 |
))) { |
|
471 | ! |
if (!shinyvalidate::input_provided(value)) { |
472 | ! |
"Please select the theoretical distribution." |
473 |
} |
|
474 |
} |
|
475 |
} |
|
476 | ! |
iv_dist <- shinyvalidate::InputValidator$new() |
477 | ! |
iv_dist$add_rule("t_dist", rule_dist) |
478 | ! |
iv_dist$add_rule("dist_param1", rule_dist_1) |
479 | ! |
iv_dist$add_rule("dist_param2", rule_dist_2) |
480 | ! |
iv_dist$enable() |
481 | ||
482 | ! |
anl_merged_input <- teal.transform::merge_expression_srv( |
483 | ! |
selector_list = selector_list, |
484 | ! |
datasets = data |
485 |
) |
|
486 | ||
487 | ! |
anl_merged_q <- reactive({ |
488 | ! |
req(anl_merged_input()) |
489 | ! |
data() %>% |
490 | ! |
teal.code::eval_code(as.expression(anl_merged_input()$expr)) |
491 |
}) |
|
492 | ||
493 | ! |
merged <- list( |
494 | ! |
anl_input_r = anl_merged_input, |
495 | ! |
anl_q_r = anl_merged_q |
496 |
) |
|
497 | ||
498 | ! |
output$scales_types_ui <- renderUI({ |
499 | ! |
if ("group_i" %in% names(selector_list()) && length(selector_list()$group_i()$filters[[1]]$selected) > 0) { |
500 | ! |
shinyWidgets::prettyRadioButtons( |
501 | ! |
ns("scales_type"), |
502 | ! |
label = "Scales:", |
503 | ! |
choices = c("Fixed", "Free"), |
504 | ! |
selected = "Fixed", |
505 | ! |
bigger = FALSE, |
506 | ! |
inline = TRUE |
507 |
) |
|
508 |
} |
|
509 |
}) |
|
510 | ||
511 | ! |
observeEvent( |
512 | ! |
eventExpr = list( |
513 | ! |
input$t_dist, |
514 | ! |
input$params_reset, |
515 | ! |
selector_list()$dist_i()$select |
516 |
), |
|
517 | ! |
handlerExpr = { |
518 | ! |
params <- |
519 | ! |
if (length(input$t_dist) != 0) { |
520 | ! |
get_dist_params <- function(x, dist) { |
521 | ! |
if (dist == "unif") { |
522 | ! |
return(stats::setNames(range(x, na.rm = TRUE), c("min", "max"))) |
523 |
} |
|
524 | ! |
tryCatch( |
525 | ! |
MASS::fitdistr(x, densfun = dist)$estimate, |
526 | ! |
error = function(e) c(param1 = NA_real_, param2 = NA_real_) |
527 |
) |
|
528 |
} |
|
529 | ||
530 | ! |
ANL <- merged$anl_q_r()[["ANL"]] |
531 | ! |
round(get_dist_params(as.numeric(stats::na.omit(ANL[[merge_vars()$dist_var]])), input$t_dist), 2) |
532 |
} else { |
|
533 | ! |
c("param1" = NA_real_, "param2" = NA_real_) |
534 |
} |
|
535 | ||
536 | ! |
params_vals <- unname(params) |
537 | ! |
params_names <- names(params) |
538 | ||
539 | ! |
updateNumericInput( |
540 | ! |
inputId = "dist_param1", |
541 | ! |
label = params_names[1], |
542 | ! |
value = restoreInput(ns("dist_param1"), params_vals[1]) |
543 |
) |
|
544 | ! |
updateNumericInput( |
545 | ! |
inputId = "dist_param2", |
546 | ! |
label = params_names[2], |
547 | ! |
value = restoreInput(ns("dist_param1"), params_vals[2]) |
548 |
) |
|
549 |
}, |
|
550 | ! |
ignoreInit = TRUE |
551 |
) |
|
552 | ||
553 | ! |
observeEvent(input$params_reset, { |
554 | ! |
updateActionButton(inputId = "params_reset", label = "Reset params") |
555 |
}) |
|
556 | ||
557 | ! |
merge_vars <- reactive({ |
558 | ! |
teal::validate_inputs(iv_r()) |
559 | ||
560 | ! |
dist_var <- as.vector(merged$anl_input_r()$columns_source$dist_i) |
561 | ! |
s_var <- as.vector(merged$anl_input_r()$columns_source$strata_i) |
562 | ! |
g_var <- as.vector(merged$anl_input_r()$columns_source$group_i) |
563 | ||
564 | ! |
dist_var_name <- if (length(dist_var)) as.name(dist_var) else NULL |
565 | ! |
s_var_name <- if (length(s_var)) as.name(s_var) else NULL |
566 | ! |
g_var_name <- if (length(g_var)) as.name(g_var) else NULL |
567 | ||
568 | ! |
list( |
569 | ! |
dist_var = dist_var, |
570 | ! |
s_var = s_var, |
571 | ! |
g_var = g_var, |
572 | ! |
dist_var_name = dist_var_name, |
573 | ! |
s_var_name = s_var_name, |
574 | ! |
g_var_name = g_var_name |
575 |
) |
|
576 |
}) |
|
577 | ||
578 |
# common qenv |
|
579 | ! |
common_q <- reactive({ |
580 |
# Create a private stack for this function only. |
|
581 | ||
582 | ! |
ANL <- merged$anl_q_r()[["ANL"]] |
583 | ! |
dist_var <- merge_vars()$dist_var |
584 | ! |
s_var <- merge_vars()$s_var |
585 | ! |
g_var <- merge_vars()$g_var |
586 | ||
587 | ! |
dist_var_name <- merge_vars()$dist_var_name |
588 | ! |
s_var_name <- merge_vars()$s_var_name |
589 | ! |
g_var_name <- merge_vars()$g_var_name |
590 | ||
591 | ! |
roundn <- input$roundn |
592 | ! |
dist_param1 <- input$dist_param1 |
593 | ! |
dist_param2 <- input$dist_param2 |
594 |
# isolated as dist_param1/dist_param2 already triggered the reactivity |
|
595 | ! |
t_dist <- isolate(input$t_dist) |
596 | ||
597 | ! |
qenv <- merged$anl_q_r() |
598 | ||
599 | ! |
if (length(g_var) > 0) { |
600 | ! |
validate( |
601 | ! |
need( |
602 | ! |
inherits(ANL[[g_var]], c("integer", "factor", "character")), |
603 | ! |
"Group by variable must be `factor`, `character`, or `integer`" |
604 |
) |
|
605 |
) |
|
606 | ! |
qenv <- teal.code::eval_code( |
607 | ! |
qenv, |
608 | ! |
substitute( |
609 | ! |
expr = ANL[[g_var]] <- forcats::fct_na_value_to_level(as.factor(ANL[[g_var]]), "NA"), |
610 | ! |
env = list(g_var = g_var) |
611 |
) |
|
612 |
) |
|
613 |
} |
|
614 | ||
615 | ! |
if (length(s_var) > 0) { |
616 | ! |
validate( |
617 | ! |
need( |
618 | ! |
inherits(ANL[[s_var]], c("integer", "factor", "character")), |
619 | ! |
"Stratify by variable must be `factor`, `character`, or `integer`" |
620 |
) |
|
621 |
) |
|
622 | ! |
qenv <- teal.code::eval_code( |
623 | ! |
qenv, |
624 | ! |
substitute( |
625 | ! |
expr = ANL[[s_var]] <- forcats::fct_na_value_to_level(as.factor(ANL[[s_var]]), "NA"), |
626 | ! |
env = list(s_var = s_var) |
627 |
) |
|
628 |
) |
|
629 |
} |
|
630 | ||
631 | ! |
validate(need(is.numeric(ANL[[dist_var]]), "Please select a numeric variable.")) |
632 | ! |
teal::validate_has_data(ANL, 1, complete = TRUE) |
633 | ||
634 | ! |
if (length(t_dist) != 0) { |
635 | ! |
map_distr_nams <- list( |
636 | ! |
normal = c("mean", "sd"), |
637 | ! |
lognormal = c("meanlog", "sdlog"), |
638 | ! |
gamma = c("shape", "rate"), |
639 | ! |
unif = c("min", "max") |
640 |
) |
|
641 | ! |
params_names_raw <- map_distr_nams[[t_dist]] |
642 | ||
643 | ! |
qenv <- teal.code::eval_code( |
644 | ! |
qenv, |
645 | ! |
substitute( |
646 | ! |
expr = { |
647 | ! |
params <- as.list(c(dist_param1, dist_param2)) |
648 | ! |
names(params) <- params_names_raw |
649 |
}, |
|
650 | ! |
env = list( |
651 | ! |
dist_param1 = dist_param1, |
652 | ! |
dist_param2 = dist_param2, |
653 | ! |
params_names_raw = params_names_raw |
654 |
) |
|
655 |
) |
|
656 |
) |
|
657 |
} |
|
658 | ||
659 | ! |
if (length(s_var) == 0 && length(g_var) == 0) { |
660 | ! |
qenv <- teal.code::eval_code( |
661 | ! |
qenv, |
662 | ! |
substitute( |
663 | ! |
expr = { |
664 | ! |
summary_table <- ANL %>% |
665 | ! |
dplyr::summarise( |
666 | ! |
min = round(min(dist_var_name, na.rm = TRUE), roundn), |
667 | ! |
median = round(stats::median(dist_var_name, na.rm = TRUE), roundn), |
668 | ! |
mean = round(mean(dist_var_name, na.rm = TRUE), roundn), |
669 | ! |
max = round(max(dist_var_name, na.rm = TRUE), roundn), |
670 | ! |
sd = round(stats::sd(dist_var_name, na.rm = TRUE), roundn), |
671 | ! |
count = dplyr::n() |
672 |
) |
|
673 |
}, |
|
674 | ! |
env = list( |
675 | ! |
dist_var_name = as.name(dist_var), |
676 | ! |
roundn = roundn |
677 |
) |
|
678 |
) |
|
679 |
) |
|
680 |
} else { |
|
681 | ! |
qenv <- teal.code::eval_code( |
682 | ! |
qenv, |
683 | ! |
substitute( |
684 | ! |
expr = { |
685 | ! |
strata_vars <- strata_vars_raw |
686 | ! |
summary_table <- ANL %>% |
687 | ! |
dplyr::group_by_at(dplyr::vars(dplyr::any_of(strata_vars))) %>% |
688 | ! |
dplyr::summarise( |
689 | ! |
min = round(min(dist_var_name, na.rm = TRUE), roundn), |
690 | ! |
median = round(stats::median(dist_var_name, na.rm = TRUE), roundn), |
691 | ! |
mean = round(mean(dist_var_name, na.rm = TRUE), roundn), |
692 | ! |
max = round(max(dist_var_name, na.rm = TRUE), roundn), |
693 | ! |
sd = round(stats::sd(dist_var_name, na.rm = TRUE), roundn), |
694 | ! |
count = dplyr::n() |
695 |
) |
|
696 | ! |
summary_table # used to display table when running show-r-code code |
697 |
}, |
|
698 | ! |
env = list( |
699 | ! |
dist_var_name = dist_var_name, |
700 | ! |
strata_vars_raw = c(g_var, s_var), |
701 | ! |
roundn = roundn |
702 |
) |
|
703 |
) |
|
704 |
) |
|
705 |
} |
|
706 |
}) |
|
707 | ||
708 |
# distplot qenv ---- |
|
709 | ! |
dist_q <- eventReactive( |
710 | ! |
eventExpr = { |
711 | ! |
common_q() |
712 | ! |
input$scales_type |
713 | ! |
input$main_type |
714 | ! |
input$bins |
715 | ! |
input$add_dens |
716 | ! |
is.null(input$ggtheme) |
717 |
}, |
|
718 | ! |
valueExpr = { |
719 | ! |
dist_var <- merge_vars()$dist_var |
720 | ! |
s_var <- merge_vars()$s_var |
721 | ! |
g_var <- merge_vars()$g_var |
722 | ! |
dist_var_name <- merge_vars()$dist_var_name |
723 | ! |
s_var_name <- merge_vars()$s_var_name |
724 | ! |
g_var_name <- merge_vars()$g_var_name |
725 | ! |
t_dist <- input$t_dist |
726 | ! |
dist_param1 <- input$dist_param1 |
727 | ! |
dist_param2 <- input$dist_param2 |
728 | ||
729 | ! |
scales_type <- input$scales_type |
730 | ||
731 | ! |
ndensity <- 512 |
732 | ! |
main_type_var <- input$main_type |
733 | ! |
bins_var <- input$bins |
734 | ! |
add_dens_var <- input$add_dens |
735 | ! |
ggtheme <- input$ggtheme |
736 | ||
737 | ! |
teal::validate_inputs(iv_dist) |
738 | ||
739 | ! |
qenv <- common_q() |
740 | ||
741 | ! |
m_type <- if (main_type_var == "Density") "density" else "count" |
742 | ||
743 | ! |
plot_call <- if (length(s_var) == 0 && length(g_var) == 0) { |
744 | ! |
substitute( |
745 | ! |
expr = ggplot(ANL, aes(dist_var_name)) + |
746 | ! |
geom_histogram( |
747 | ! |
position = "identity", aes(y = after_stat(m_type)), bins = bins_var, alpha = 0.3 |
748 |
), |
|
749 | ! |
env = list( |
750 | ! |
m_type = as.name(m_type), bins_var = bins_var, dist_var_name = as.name(dist_var) |
751 |
) |
|
752 |
) |
|
753 | ! |
} else if (length(s_var) != 0 && length(g_var) == 0) { |
754 | ! |
substitute( |
755 | ! |
expr = ggplot(ANL, aes(dist_var_name, col = s_var_name)) + |
756 | ! |
geom_histogram( |
757 | ! |
position = "identity", aes(y = after_stat(m_type), fill = s_var), bins = bins_var, alpha = 0.3 |
758 |
), |
|
759 | ! |
env = list( |
760 | ! |
m_type = as.name(m_type), |
761 | ! |
bins_var = bins_var, |
762 | ! |
dist_var_name = dist_var_name, |
763 | ! |
s_var = as.name(s_var), |
764 | ! |
s_var_name = s_var_name |
765 |
) |
|
766 |
) |
|
767 | ! |
} else if (length(s_var) == 0 && length(g_var) != 0) { |
768 | ! |
req(scales_type) |
769 | ! |
substitute( |
770 | ! |
expr = ggplot(ANL[ANL[[g_var]] != "NA", ], aes(dist_var_name)) + |
771 | ! |
geom_histogram( |
772 | ! |
position = "identity", aes(y = after_stat(m_type)), bins = bins_var, alpha = 0.3 |
773 |
) + |
|
774 | ! |
facet_wrap(~g_var_name, ncol = 1, scales = scales_raw), |
775 | ! |
env = list( |
776 | ! |
m_type = as.name(m_type), |
777 | ! |
bins_var = bins_var, |
778 | ! |
dist_var_name = dist_var_name, |
779 | ! |
g_var = g_var, |
780 | ! |
g_var_name = g_var_name, |
781 | ! |
scales_raw = tolower(scales_type) |
782 |
) |
|
783 |
) |
|
784 |
} else { |
|
785 | ! |
req(scales_type) |
786 | ! |
substitute( |
787 | ! |
expr = ggplot(ANL[ANL[[g_var]] != "NA", ], aes(dist_var_name, col = s_var_name)) + |
788 | ! |
geom_histogram( |
789 | ! |
position = "identity", |
790 | ! |
aes(y = after_stat(m_type), fill = s_var), bins = bins_var, alpha = 0.3 |
791 |
) + |
|
792 | ! |
facet_wrap(~g_var_name, ncol = 1, scales = scales_raw), |
793 | ! |
env = list( |
794 | ! |
m_type = as.name(m_type), |
795 | ! |
bins_var = bins_var, |
796 | ! |
dist_var_name = dist_var_name, |
797 | ! |
g_var = g_var, |
798 | ! |
s_var = as.name(s_var), |
799 | ! |
g_var_name = g_var_name, |
800 | ! |
s_var_name = s_var_name, |
801 | ! |
scales_raw = tolower(scales_type) |
802 |
) |
|
803 |
) |
|
804 |
} |
|
805 | ||
806 | ! |
if (add_dens_var) { |
807 | ! |
plot_call <- substitute( |
808 | ! |
expr = plot_call + |
809 | ! |
stat_density( |
810 | ! |
aes(y = after_stat(const * m_type2)), |
811 | ! |
geom = "line", |
812 | ! |
position = "identity", |
813 | ! |
alpha = 0.5, |
814 | ! |
size = 2, |
815 | ! |
n = ndensity |
816 |
), |
|
817 | ! |
env = list( |
818 | ! |
plot_call = plot_call, |
819 | ! |
const = if (main_type_var == "Density") { |
820 | ! |
1 |
821 |
} else { |
|
822 | ! |
diff(range(qenv[["ANL"]][[dist_var]], na.rm = TRUE)) / bins_var |
823 |
}, |
|
824 | ! |
m_type2 = if (main_type_var == "Density") as.name("density") else as.name("count"), |
825 | ! |
ndensity = ndensity |
826 |
) |
|
827 |
) |
|
828 |
} |
|
829 | ||
830 | ! |
if (length(t_dist) != 0 && main_type_var == "Density" && length(g_var) == 0 && length(s_var) == 0) { |
831 | ! |
qenv <- teal.code::eval_code( |
832 | ! |
qenv, |
833 | ! |
substitute( |
834 | ! |
df_params <- as.data.frame(append(params, list(name = t_dist))), |
835 | ! |
env = list(t_dist = t_dist) |
836 |
) |
|
837 |
) |
|
838 | ! |
datas <- quote(data.frame(x = 0.7, y = 1, tb = I(list(df_params = df_params)))) |
839 | ! |
label <- quote(tb) |
840 | ||
841 | ! |
plot_call <- substitute( |
842 | ! |
expr = plot_call + ggpp::geom_table_npc( |
843 | ! |
data = data, |
844 | ! |
aes(npcx = x, npcy = y, label = label), |
845 | ! |
hjust = 0, vjust = 1, size = 4 |
846 |
), |
|
847 | ! |
env = list(plot_call = plot_call, data = datas, label = label) |
848 |
) |
|
849 |
} |
|
850 | ||
851 | ! |
if ( |
852 | ! |
length(s_var) == 0 && |
853 | ! |
length(g_var) == 0 && |
854 | ! |
main_type_var == "Density" && |
855 | ! |
length(t_dist) != 0 && |
856 | ! |
main_type_var == "Density" |
857 |
) { |
|
858 | ! |
map_dist <- stats::setNames( |
859 | ! |
c("dnorm", "dlnorm", "dgamma", "dunif"), |
860 | ! |
c("normal", "lognormal", "gamma", "unif") |
861 |
) |
|
862 | ! |
plot_call <- substitute( |
863 | ! |
expr = plot_call + stat_function( |
864 | ! |
data = data.frame(x = range(ANL[[dist_var]]), color = mapped_dist), |
865 | ! |
aes(x, color = color), |
866 | ! |
fun = mapped_dist_name, |
867 | ! |
n = ndensity, |
868 | ! |
size = 2, |
869 | ! |
args = params |
870 |
) + |
|
871 | ! |
scale_color_manual(values = stats::setNames("blue", mapped_dist), aesthetics = "color"), |
872 | ! |
env = list( |
873 | ! |
plot_call = plot_call, |
874 | ! |
dist_var = dist_var, |
875 | ! |
ndensity = ndensity, |
876 | ! |
mapped_dist = unname(map_dist[t_dist]), |
877 | ! |
mapped_dist_name = as.name(unname(map_dist[t_dist])) |
878 |
) |
|
879 |
) |
|
880 |
} |
|
881 | ||
882 | ! |
all_ggplot2_args <- teal.widgets::resolve_ggplot2_args( |
883 | ! |
user_plot = ggplot2_args[["Histogram"]], |
884 | ! |
user_default = ggplot2_args$default |
885 |
) |
|
886 | ||
887 | ! |
parsed_ggplot2_args <- teal.widgets::parse_ggplot2_args( |
888 | ! |
all_ggplot2_args, |
889 | ! |
ggtheme = ggtheme |
890 |
) |
|
891 | ||
892 | ! |
teal.code::eval_code( |
893 | ! |
qenv, |
894 | ! |
substitute( |
895 | ! |
expr = { |
896 | ! |
g <- plot_call |
897 | ! |
print(g) |
898 |
}, |
|
899 | ! |
env = list(plot_call = Reduce(function(x, y) call("+", x, y), c(plot_call, parsed_ggplot2_args))) |
900 |
) |
|
901 |
) |
|
902 |
} |
|
903 |
) |
|
904 | ||
905 |
# qqplot qenv ---- |
|
906 | ! |
qq_q <- eventReactive( |
907 | ! |
eventExpr = { |
908 | ! |
common_q() |
909 | ! |
input$scales_type |
910 | ! |
input$qq_line |
911 | ! |
is.null(input$ggtheme) |
912 |
}, |
|
913 | ! |
valueExpr = { |
914 | ! |
dist_var <- merge_vars()$dist_var |
915 | ! |
s_var <- merge_vars()$s_var |
916 | ! |
g_var <- merge_vars()$g_var |
917 | ! |
dist_var_name <- merge_vars()$dist_var_name |
918 | ! |
s_var_name <- merge_vars()$s_var_name |
919 | ! |
g_var_name <- merge_vars()$g_var_name |
920 | ! |
t_dist <- input$t_dist |
921 | ! |
dist_param1 <- input$dist_param1 |
922 | ! |
dist_param2 <- input$dist_param2 |
923 | ||
924 | ! |
scales_type <- input$scales_type |
925 | ! |
ggtheme <- input$ggtheme |
926 | ||
927 | ! |
teal::validate_inputs(iv_r_dist(), iv_dist) |
928 | ||
929 | ! |
qenv <- common_q() |
930 | ||
931 | ! |
plot_call <- if (length(s_var) == 0 && length(g_var) == 0) { |
932 | ! |
substitute( |
933 | ! |
expr = ggplot(ANL, aes_string(sample = dist_var)), |
934 | ! |
env = list(dist_var = dist_var) |
935 |
) |
|
936 | ! |
} else if (length(s_var) != 0 && length(g_var) == 0) { |
937 | ! |
substitute( |
938 | ! |
expr = ggplot(ANL, aes_string(sample = dist_var, color = s_var)), |
939 | ! |
env = list(dist_var = dist_var, s_var = s_var) |
940 |
) |
|
941 | ! |
} else if (length(s_var) == 0 && length(g_var) != 0) { |
942 | ! |
substitute( |
943 | ! |
expr = ggplot(ANL[ANL[[g_var]] != "NA", ], aes_string(sample = dist_var)) + |
944 | ! |
facet_wrap(~g_var_name, ncol = 1, scales = scales_raw), |
945 | ! |
env = list( |
946 | ! |
dist_var = dist_var, |
947 | ! |
g_var = g_var, |
948 | ! |
g_var_name = g_var_name, |
949 | ! |
scales_raw = tolower(scales_type) |
950 |
) |
|
951 |
) |
|
952 |
} else { |
|
953 | ! |
substitute( |
954 | ! |
expr = ggplot(ANL[ANL[[g_var]] != "NA", ], aes_string(sample = dist_var, color = s_var)) + |
955 | ! |
facet_wrap(~g_var_name, ncol = 1, scales = scales_raw), |
956 | ! |
env = list( |
957 | ! |
dist_var = dist_var, |
958 | ! |
g_var = g_var, |
959 | ! |
s_var = s_var, |
960 | ! |
g_var_name = g_var_name, |
961 | ! |
scales_raw = tolower(scales_type) |
962 |
) |
|
963 |
) |
|
964 |
} |
|
965 | ||
966 | ! |
map_dist <- stats::setNames( |
967 | ! |
c("qnorm", "qlnorm", "qgamma", "qunif"), |
968 | ! |
c("normal", "lognormal", "gamma", "unif") |
969 |
) |
|
970 | ||
971 | ! |
plot_call <- substitute( |
972 | ! |
expr = plot_call + |
973 | ! |
stat_qq(distribution = mapped_dist, dparams = params), |
974 | ! |
env = list(plot_call = plot_call, mapped_dist = as.name(unname(map_dist[t_dist]))) |
975 |
) |
|
976 | ||
977 | ! |
if (length(t_dist) != 0 && length(g_var) == 0 && length(s_var) == 0) { |
978 | ! |
qenv <- teal.code::eval_code( |
979 | ! |
qenv, |
980 | ! |
substitute( |
981 | ! |
df_params <- as.data.frame(append(params, list(name = t_dist))), |
982 | ! |
env = list(t_dist = t_dist) |
983 |
) |
|
984 |
) |
|
985 | ! |
datas <- quote(data.frame(x = 0.7, y = 1, tb = I(list(df_params = df_params)))) |
986 | ! |
label <- quote(tb) |
987 | ||
988 | ! |
plot_call <- substitute( |
989 | ! |
expr = plot_call + |
990 | ! |
ggpp::geom_table_npc( |
991 | ! |
data = data, |
992 | ! |
aes(npcx = x, npcy = y, label = label), |
993 | ! |
hjust = 0, |
994 | ! |
vjust = 1, |
995 | ! |
size = 4 |
996 |
), |
|
997 | ! |
env = list( |
998 | ! |
plot_call = plot_call, |
999 | ! |
data = datas, |
1000 | ! |
label = label |
1001 |
) |
|
1002 |
) |
|
1003 |
} |
|
1004 | ||
1005 | ! |
if (isTRUE(input$qq_line)) { |
1006 | ! |
plot_call <- substitute( |
1007 | ! |
expr = plot_call + |
1008 | ! |
stat_qq_line(distribution = mapped_dist, dparams = params), |
1009 | ! |
env = list(plot_call = plot_call, mapped_dist = as.name(unname(map_dist[t_dist]))) |
1010 |
) |
|
1011 |
} |
|
1012 | ||
1013 | ! |
all_ggplot2_args <- teal.widgets::resolve_ggplot2_args( |
1014 | ! |
user_plot = ggplot2_args[["QQplot"]], |
1015 | ! |
user_default = ggplot2_args$default, |
1016 | ! |
module_plot = teal.widgets::ggplot2_args(labs = list(x = "theoretical", y = "sample")) |
1017 |
) |
|
1018 | ||
1019 | ! |
parsed_ggplot2_args <- teal.widgets::parse_ggplot2_args( |
1020 | ! |
all_ggplot2_args, |
1021 | ! |
ggtheme = ggtheme |
1022 |
) |
|
1023 | ||
1024 | ! |
teal.code::eval_code( |
1025 | ! |
qenv, |
1026 | ! |
substitute( |
1027 | ! |
expr = { |
1028 | ! |
g <- plot_call |
1029 | ! |
print(g) |
1030 |
}, |
|
1031 | ! |
env = list(plot_call = Reduce(function(x, y) call("+", x, y), c(plot_call, parsed_ggplot2_args))) |
1032 |
) |
|
1033 |
) |
|
1034 |
} |
|
1035 |
) |
|
1036 | ||
1037 |
# test qenv ---- |
|
1038 | ! |
test_q <- eventReactive( |
1039 | ! |
ignoreNULL = FALSE, |
1040 | ! |
eventExpr = { |
1041 | ! |
common_q() |
1042 | ! |
input$dist_param1 |
1043 | ! |
input$dist_param2 |
1044 | ! |
input$dist_tests |
1045 |
}, |
|
1046 | ! |
valueExpr = { |
1047 |
# Create a private stack for this function only. |
|
1048 | ! |
ANL <- common_q()[["ANL"]] |
1049 | ||
1050 | ! |
dist_var <- merge_vars()$dist_var |
1051 | ! |
s_var <- merge_vars()$s_var |
1052 | ! |
g_var <- merge_vars()$g_var |
1053 | ||
1054 | ! |
dist_var_name <- merge_vars()$dist_var_name |
1055 | ! |
s_var_name <- merge_vars()$s_var_name |
1056 | ! |
g_var_name <- merge_vars()$g_var_name |
1057 | ||
1058 | ! |
dist_param1 <- input$dist_param1 |
1059 | ! |
dist_param2 <- input$dist_param2 |
1060 | ! |
dist_tests <- input$dist_tests |
1061 | ! |
t_dist <- input$t_dist |
1062 | ||
1063 | ! |
validate(need(dist_tests, "Please select a test")) |
1064 | ||
1065 | ! |
teal::validate_inputs(iv_dist) |
1066 | ||
1067 | ! |
if (length(s_var) > 0 || length(g_var) > 0) { |
1068 | ! |
counts <- ANL %>% |
1069 | ! |
dplyr::group_by_at(dplyr::vars(dplyr::any_of(c(s_var, g_var)))) %>% |
1070 | ! |
dplyr::summarise(n = dplyr::n()) |
1071 | ||
1072 | ! |
validate(need(all(counts$n > 5), "Please select strata*group with at least 5 observation each.")) |
1073 |
} |
|
1074 | ||
1075 | ||
1076 | ! |
if (dist_tests %in% c( |
1077 | ! |
"t-test (two-samples, not paired)", |
1078 | ! |
"F-test", |
1079 | ! |
"Kolmogorov-Smirnov (two-samples)" |
1080 |
)) { |
|
1081 | ! |
if (length(g_var) == 0 && length(s_var) > 0) { |
1082 | ! |
validate(need( |
1083 | ! |
length(unique(ANL[[s_var]])) == 2, |
1084 | ! |
"Please select stratify variable with 2 levels." |
1085 |
)) |
|
1086 |
} |
|
1087 | ! |
if (length(g_var) > 0 && length(s_var) > 0) { |
1088 | ! |
validate(need( |
1089 | ! |
all(stats::na.omit(as.vector( |
1090 | ! |
tapply(ANL[[s_var]], list(ANL[[g_var]]), function(x) length(unique(x))) == 2 |
1091 |
))), |
|
1092 | ! |
"Please select stratify variable with 2 levels, per each group." |
1093 |
)) |
|
1094 |
} |
|
1095 |
} |
|
1096 | ||
1097 | ! |
map_dist <- stats::setNames( |
1098 | ! |
c("pnorm", "plnorm", "pgamma", "punif"), |
1099 | ! |
c("normal", "lognormal", "gamma", "unif") |
1100 |
) |
|
1101 | ! |
sks_args <- list( |
1102 | ! |
test = quote(stats::ks.test), |
1103 | ! |
args = bquote(append(list(.[[.(dist_var)]], .(map_dist[t_dist])), params)), |
1104 | ! |
groups = c(g_var, s_var) |
1105 |
) |
|
1106 | ! |
ssw_args <- list( |
1107 | ! |
test = quote(stats::shapiro.test), |
1108 | ! |
args = bquote(list(.[[.(dist_var)]])), |
1109 | ! |
groups = c(g_var, s_var) |
1110 |
) |
|
1111 | ! |
mfil_args <- list( |
1112 | ! |
test = quote(stats::fligner.test), |
1113 | ! |
args = bquote(list(.[[.(dist_var)]], .[[.(s_var)]])), |
1114 | ! |
groups = c(g_var) |
1115 |
) |
|
1116 | ! |
sad_args <- list( |
1117 | ! |
test = quote(goftest::ad.test), |
1118 | ! |
args = bquote(append(list(.[[.(dist_var)]], .(map_dist[t_dist])), params)), |
1119 | ! |
groups = c(g_var, s_var) |
1120 |
) |
|
1121 | ! |
scvm_args <- list( |
1122 | ! |
test = quote(goftest::cvm.test), |
1123 | ! |
args = bquote(append(list(.[[.(dist_var)]], .(map_dist[t_dist])), params)), |
1124 | ! |
groups = c(g_var, s_var) |
1125 |
) |
|
1126 | ! |
manov_args <- list( |
1127 | ! |
test = quote(stats::aov), |
1128 | ! |
args = bquote(list(stats::formula(.(dist_var_name) ~ .(s_var_name)), .)), |
1129 | ! |
groups = c(g_var) |
1130 |
) |
|
1131 | ! |
mt_args <- list( |
1132 | ! |
test = quote(stats::t.test), |
1133 | ! |
args = bquote(unname(split(.[[.(dist_var)]], .[[.(s_var)]], drop = TRUE))), |
1134 | ! |
groups = c(g_var) |
1135 |
) |
|
1136 | ! |
mv_args <- list( |
1137 | ! |
test = quote(stats::var.test), |
1138 | ! |
args = bquote(unname(split(.[[.(dist_var)]], .[[.(s_var)]], drop = TRUE))), |
1139 | ! |
groups = c(g_var) |
1140 |
) |
|
1141 | ! |
mks_args <- list( |
1142 | ! |
test = quote(stats::ks.test), |
1143 | ! |
args = bquote(unname(split(.[[.(dist_var)]], .[[.(s_var)]], drop = TRUE))), |
1144 | ! |
groups = c(g_var) |
1145 |
) |
|
1146 | ||
1147 | ! |
tests_base <- switch(dist_tests, |
1148 | ! |
"Kolmogorov-Smirnov (one-sample)" = sks_args, |
1149 | ! |
"Shapiro-Wilk" = ssw_args, |
1150 | ! |
"Fligner-Killeen" = mfil_args, |
1151 | ! |
"one-way ANOVA" = manov_args, |
1152 | ! |
"t-test (two-samples, not paired)" = mt_args, |
1153 | ! |
"F-test" = mv_args, |
1154 | ! |
"Kolmogorov-Smirnov (two-samples)" = mks_args, |
1155 | ! |
"Anderson-Darling (one-sample)" = sad_args, |
1156 | ! |
"Cramer-von Mises (one-sample)" = scvm_args |
1157 |
) |
|
1158 | ||
1159 | ! |
env <- list( |
1160 | ! |
t_test = t_dist, |
1161 | ! |
dist_var = dist_var, |
1162 | ! |
g_var = g_var, |
1163 | ! |
s_var = s_var, |
1164 | ! |
args = tests_base$args, |
1165 | ! |
groups = tests_base$groups, |
1166 | ! |
test = tests_base$test, |
1167 | ! |
dist_var_name = dist_var_name, |
1168 | ! |
g_var_name = g_var_name, |
1169 | ! |
s_var_name = s_var_name |
1170 |
) |
|
1171 | ||
1172 | ! |
qenv <- common_q() |
1173 | ||
1174 | ! |
if (length(s_var) == 0 && length(g_var) == 0) { |
1175 | ! |
qenv <- teal.code::eval_code( |
1176 | ! |
qenv, |
1177 | ! |
substitute( |
1178 | ! |
expr = { |
1179 | ! |
test_stats <- ANL %>% |
1180 | ! |
dplyr::select(dist_var) %>% |
1181 | ! |
with(., broom::glance(do.call(test, args))) %>% |
1182 | ! |
dplyr::mutate_if(is.numeric, round, 3) |
1183 |
}, |
|
1184 | ! |
env = env |
1185 |
) |
|
1186 |
) |
|
1187 |
} else { |
|
1188 | ! |
qenv <- teal.code::eval_code( |
1189 | ! |
qenv, |
1190 | ! |
substitute( |
1191 | ! |
expr = { |
1192 | ! |
test_stats <- ANL %>% |
1193 | ! |
dplyr::select(dist_var, s_var, g_var) %>% |
1194 | ! |
dplyr::group_by_at(dplyr::vars(dplyr::any_of(groups))) %>% |
1195 | ! |
dplyr::do(tests = broom::glance(do.call(test, args))) %>% |
1196 | ! |
tidyr::unnest(tests) %>% |
1197 | ! |
dplyr::mutate_if(is.numeric, round, 3) |
1198 |
}, |
|
1199 | ! |
env = env |
1200 |
) |
|
1201 |
) |
|
1202 |
} |
|
1203 | ! |
qenv %>% |
1204 |
# used to display table when running show-r-code code |
|
1205 | ! |
teal.code::eval_code(quote(test_stats)) |
1206 |
} |
|
1207 |
) |
|
1208 | ||
1209 |
# outputs ---- |
|
1210 |
## building main qenv |
|
1211 | ! |
output_q <- reactive({ |
1212 | ! |
tab <- input$tabs |
1213 | ! |
req(tab) # tab is NULL upon app launch, hence will crash without this statement |
1214 | ||
1215 | ! |
qenv_final <- common_q() |
1216 |
# wrapped in if since could lead into validate error - we do want to continue |
|
1217 | ! |
test_r_qenv_out <- try(test_q(), silent = TRUE) |
1218 | ! |
if (!inherits(test_r_qenv_out, c("try-error", "error"))) { |
1219 | ! |
qenv_final <- teal.code::join(qenv_final, test_q()) |
1220 |
} |
|
1221 | ||
1222 | ! |
qenv_final <- if (tab == "Histogram") { |
1223 | ! |
req(dist_q()) |
1224 | ! |
teal.code::join(qenv_final, dist_q()) |
1225 | ! |
} else if (tab == "QQplot") { |
1226 | ! |
req(qq_q()) |
1227 | ! |
teal.code::join(qenv_final, qq_q()) |
1228 |
} |
|
1229 | ! |
qenv_final |
1230 |
}) |
|
1231 | ||
1232 | ! |
dist_r <- reactive(dist_q()[["g"]]) |
1233 | ||
1234 | ! |
qq_r <- reactive(qq_q()[["g"]]) |
1235 | ||
1236 | ! |
output$summary_table <- DT::renderDataTable( |
1237 | ! |
expr = if (iv_r()$is_valid()) common_q()[["summary_table"]] else NULL, |
1238 | ! |
options = list( |
1239 | ! |
autoWidth = TRUE, |
1240 | ! |
columnDefs = list(list(width = "200px", targets = "_all")) |
1241 |
), |
|
1242 | ! |
rownames = FALSE |
1243 |
) |
|
1244 | ||
1245 | ! |
tests_r <- reactive({ |
1246 | ! |
req(iv_r()$is_valid()) |
1247 | ! |
teal::validate_inputs(iv_r_dist()) |
1248 | ! |
test_q()[["test_stats"]] |
1249 |
}) |
|
1250 | ||
1251 | ! |
pws1 <- teal.widgets::plot_with_settings_srv( |
1252 | ! |
id = "hist_plot", |
1253 | ! |
plot_r = dist_r, |
1254 | ! |
height = plot_height, |
1255 | ! |
width = plot_width, |
1256 | ! |
brushing = FALSE |
1257 |
) |
|
1258 | ||
1259 | ! |
pws2 <- teal.widgets::plot_with_settings_srv( |
1260 | ! |
id = "qq_plot", |
1261 | ! |
plot_r = qq_r, |
1262 | ! |
height = plot_height, |
1263 | ! |
width = plot_width, |
1264 | ! |
brushing = FALSE |
1265 |
) |
|
1266 | ||
1267 | ! |
output$t_stats <- DT::renderDataTable( |
1268 | ! |
expr = tests_r(), |
1269 | ! |
options = list(scrollX = TRUE), |
1270 | ! |
rownames = FALSE |
1271 |
) |
|
1272 | ||
1273 | ! |
teal.widgets::verbatim_popup_srv( |
1274 | ! |
id = "rcode", |
1275 | ! |
verbatim_content = reactive(teal.code::get_code(output_q())), |
1276 | ! |
title = "R Code for distribution" |
1277 |
) |
|
1278 | ||
1279 |
### REPORTER |
|
1280 | ! |
if (with_reporter) { |
1281 | ! |
card_fun <- function(comment, label) { |
1282 | ! |
card <- teal::report_card_template( |
1283 | ! |
title = "Distribution Plot", |
1284 | ! |
label = label, |
1285 | ! |
with_filter = with_filter, |
1286 | ! |
filter_panel_api = filter_panel_api |
1287 |
) |
|
1288 | ! |
card$append_text("Plot", "header3") |
1289 | ! |
if (input$tabs == "Histogram") { |
1290 | ! |
card$append_plot(dist_r(), dim = pws1$dim()) |
1291 | ! |
} else if (input$tabs == "QQplot") { |
1292 | ! |
card$append_plot(qq_r(), dim = pws2$dim()) |
1293 |
} |
|
1294 | ! |
card$append_text("Statistics table", "header3") |
1295 | ||
1296 | ! |
card$append_table(common_q()[["summary_table"]]) |
1297 | ! |
tests_error <- tryCatch(expr = tests_r(), error = function(e) "error") |
1298 | ! |
if (inherits(tests_error, "data.frame")) { |
1299 | ! |
card$append_text("Tests table", "header3") |
1300 | ! |
card$append_table(tests_r()) |
1301 |
} |
|
1302 | ||
1303 | ! |
if (!comment == "") { |
1304 | ! |
card$append_text("Comment", "header3") |
1305 | ! |
card$append_text(comment) |
1306 |
} |
|
1307 | ! |
card$append_src(teal.code::get_code(output_q())) |
1308 | ! |
card |
1309 |
} |
|
1310 | ! |
teal.reporter::simple_reporter_srv("simple_reporter", reporter = reporter, card_fun = card_fun) |
1311 |
} |
|
1312 |
### |
|
1313 |
}) |
|
1314 |
} |
1 |
#' `teal` module: Variable browser |
|
2 |
#' |
|
3 |
#' Module provides provides a detailed summary and visualization of variable distributions |
|
4 |
#' for `data.frame` objects, with interactive features to customize analysis. |
|
5 |
#' |
|
6 |
#' Numeric columns with fewer than 30 distinct values can be treated as either discrete |
|
7 |
#' or continuous with a checkbox allowing users to switch how they are treated(if < 6 unique values |
|
8 |
#' then the default is discrete, otherwise it is continuous). |
|
9 |
#' |
|
10 |
#' @inheritParams teal::module |
|
11 |
#' @inheritParams shared_params |
|
12 |
#' @param parent_dataname (`character(1)`) string specifying a parent dataset. |
|
13 |
#' If it exists in `datasets_selected`then an extra checkbox will be shown to |
|
14 |
#' allow users to not show variables in other datasets which exist in this `dataname`. |
|
15 |
#' This is typically used to remove `ADSL` columns in `CDISC` data. |
|
16 |
#' In non `CDISC` data this can be ignored. Defaults to `"ADSL"`. |
|
17 |
#' @param datasets_selected (`character`) vector of datasets which should be |
|
18 |
#' shown, in order. Names must correspond with datasets names. |
|
19 |
#' If vector of length zero (default) then all datasets are shown. |
|
20 |
#' Note: Only `data.frame` objects are compatible; using other types will cause an error. |
|
21 |
#' |
|
22 |
#' @inherit shared_params return |
|
23 |
#' |
|
24 |
#' @examplesShinylive |
|
25 |
#' library(teal.modules.general) |
|
26 |
#' interactive <- function() TRUE |
|
27 |
#' {{ next_example }} |
|
28 |
# nolint start: line_length_linter. |
|
29 |
#' @examplesIf require("sparkline", quietly = TRUE) && require("htmlwidgets", quietly = TRUE) && require("jsonlite", quietly = TRUE) |
|
30 |
# nolint end: line_length_linter. |
|
31 |
#' # general data example |
|
32 |
#' data <- teal_data() |
|
33 |
#' data <- within(data, { |
|
34 |
#' iris <- iris |
|
35 |
#' mtcars <- mtcars |
|
36 |
#' women <- women |
|
37 |
#' faithful <- faithful |
|
38 |
#' CO2 <- CO2 |
|
39 |
#' }) |
|
40 |
#' datanames(data) <- c("iris", "mtcars", "women", "faithful", "CO2") |
|
41 |
#' |
|
42 |
#' app <- init( |
|
43 |
#' data = data, |
|
44 |
#' modules = modules( |
|
45 |
#' tm_variable_browser( |
|
46 |
#' label = "Variable browser" |
|
47 |
#' ) |
|
48 |
#' ) |
|
49 |
#' ) |
|
50 |
#' if (interactive()) { |
|
51 |
#' shinyApp(app$ui, app$server) |
|
52 |
#' } |
|
53 |
#' |
|
54 |
#' @examplesShinylive |
|
55 |
#' library(teal.modules.general) |
|
56 |
#' interactive <- function() TRUE |
|
57 |
#' {{ next_example }} |
|
58 |
# nolint start: line_length_linter. |
|
59 |
#' @examplesIf require("sparkline", quietly = TRUE) && require("htmlwidgets", quietly = TRUE) && require("jsonlite", quietly = TRUE) |
|
60 |
# nolint end: line_length_linter. |
|
61 |
#' # CDISC example data |
|
62 |
#' library(sparkline) |
|
63 |
#' data <- teal_data() |
|
64 |
#' data <- within(data, { |
|
65 |
#' ADSL <- rADSL |
|
66 |
#' ADTTE <- rADTTE |
|
67 |
#' }) |
|
68 |
#' datanames(data) <- c("ADSL", "ADTTE") |
|
69 |
#' join_keys(data) <- default_cdisc_join_keys[datanames(data)] |
|
70 |
#' |
|
71 |
#' app <- init( |
|
72 |
#' data = data, |
|
73 |
#' modules = modules( |
|
74 |
#' tm_variable_browser( |
|
75 |
#' label = "Variable browser" |
|
76 |
#' ) |
|
77 |
#' ) |
|
78 |
#' ) |
|
79 |
#' if (interactive()) { |
|
80 |
#' shinyApp(app$ui, app$server) |
|
81 |
#' } |
|
82 |
#' |
|
83 |
#' @export |
|
84 |
#' |
|
85 |
tm_variable_browser <- function(label = "Variable Browser", |
|
86 |
datasets_selected = character(0), |
|
87 |
parent_dataname = "ADSL", |
|
88 |
pre_output = NULL, |
|
89 |
post_output = NULL, |
|
90 |
ggplot2_args = teal.widgets::ggplot2_args()) { |
|
91 | ! |
message("Initializing tm_variable_browser") |
92 | ||
93 |
# Requires Suggested packages |
|
94 | ! |
if (!requireNamespace("sparkline", quietly = TRUE)) { |
95 | ! |
stop("Cannot load sparkline - please install the package or restart your session.") |
96 |
} |
|
97 | ! |
if (!requireNamespace("htmlwidgets", quietly = TRUE)) { |
98 | ! |
stop("Cannot load htmlwidgets - please install the package or restart your session.") |
99 |
} |
|
100 | ! |
if (!requireNamespace("jsonlite", quietly = TRUE)) { |
101 | ! |
stop("Cannot load jsonlite - please install the package or restart your session.") |
102 |
} |
|
103 | ||
104 |
# Start of assertions |
|
105 | ! |
checkmate::assert_string(label) |
106 | ! |
checkmate::assert_character(datasets_selected) |
107 | ! |
checkmate::assert_character(parent_dataname, min.len = 0, max.len = 1) |
108 | ! |
checkmate::assert_multi_class(pre_output, c("shiny.tag", "shiny.tag.list", "html"), null.ok = TRUE) |
109 | ! |
checkmate::assert_multi_class(post_output, c("shiny.tag", "shiny.tag.list", "html"), null.ok = TRUE) |
110 | ! |
checkmate::assert_class(ggplot2_args, "ggplot2_args") |
111 |
# End of assertions |
|
112 | ||
113 | ! |
datasets_selected <- unique(datasets_selected) |
114 | ||
115 | ! |
ans <- module( |
116 | ! |
label, |
117 | ! |
server = srv_variable_browser, |
118 | ! |
ui = ui_variable_browser, |
119 | ! |
datanames = "all", |
120 | ! |
server_args = list( |
121 | ! |
datasets_selected = datasets_selected, |
122 | ! |
parent_dataname = parent_dataname, |
123 | ! |
ggplot2_args = ggplot2_args |
124 |
), |
|
125 | ! |
ui_args = list( |
126 | ! |
pre_output = pre_output, |
127 | ! |
post_output = post_output |
128 |
) |
|
129 |
) |
|
130 |
# `shiny` inputs are stored properly but the majority of the module is state of `datatable` which is not stored. |
|
131 | ! |
attr(ans, "teal_bookmarkable") <- NULL |
132 | ! |
ans |
133 |
} |
|
134 | ||
135 |
# UI function for the variable browser module |
|
136 |
ui_variable_browser <- function(id, |
|
137 |
pre_output = NULL, |
|
138 |
post_output = NULL) { |
|
139 | ! |
ns <- NS(id) |
140 | ||
141 | ! |
tagList( |
142 | ! |
include_css_files("custom"), |
143 | ! |
shinyjs::useShinyjs(), |
144 | ! |
teal.widgets::standard_layout( |
145 | ! |
output = fluidRow( |
146 | ! |
htmlwidgets::getDependency("sparkline"), # needed for sparklines to work |
147 | ! |
column( |
148 | ! |
6, |
149 |
# variable browser |
|
150 | ! |
teal.widgets::white_small_well( |
151 | ! |
uiOutput(ns("ui_variable_browser")), |
152 | ! |
shinyjs::hidden({ |
153 | ! |
checkboxInput(ns("show_parent_vars"), "Show parent dataset variables", value = FALSE) |
154 |
}) |
|
155 |
) |
|
156 |
), |
|
157 | ! |
column( |
158 | ! |
6, |
159 | ! |
teal.widgets::white_small_well( |
160 |
### Reporter |
|
161 | ! |
teal.reporter::simple_reporter_ui(ns("simple_reporter")), |
162 |
### |
|
163 | ! |
tags$div( |
164 | ! |
class = "block", |
165 | ! |
uiOutput(ns("ui_histogram_display")) |
166 |
), |
|
167 | ! |
tags$div( |
168 | ! |
class = "block", |
169 | ! |
uiOutput(ns("ui_numeric_display")) |
170 |
), |
|
171 | ! |
teal.widgets::plot_with_settings_ui(ns("variable_plot")), |
172 | ! |
tags$br(), |
173 |
# input user-defined text size |
|
174 | ! |
teal.widgets::panel_item( |
175 | ! |
title = "Plot settings", |
176 | ! |
collapsed = TRUE, |
177 | ! |
selectInput( |
178 | ! |
inputId = ns("ggplot_theme"), label = "ggplot2 theme", |
179 | ! |
choices = ggplot_themes, |
180 | ! |
selected = "grey" |
181 |
), |
|
182 | ! |
fluidRow( |
183 | ! |
column(6, sliderInput( |
184 | ! |
inputId = ns("font_size"), label = "font size", |
185 | ! |
min = 5L, max = 30L, value = 15L, step = 1L, ticks = FALSE |
186 |
)), |
|
187 | ! |
column(6, sliderInput( |
188 | ! |
inputId = ns("label_rotation"), label = "rotate x labels", |
189 | ! |
min = 0L, max = 90L, value = 45L, step = 1, ticks = FALSE |
190 |
)) |
|
191 |
) |
|
192 |
), |
|
193 | ! |
tags$br(), |
194 | ! |
teal.widgets::get_dt_rows(ns("variable_summary_table"), ns("variable_summary_table_rows")), |
195 | ! |
DT::dataTableOutput(ns("variable_summary_table")) |
196 |
) |
|
197 |
) |
|
198 |
), |
|
199 | ! |
pre_output = pre_output, |
200 | ! |
post_output = post_output |
201 |
) |
|
202 |
) |
|
203 |
} |
|
204 | ||
205 |
# Server function for the variable browser module |
|
206 |
srv_variable_browser <- function(id, |
|
207 |
data, |
|
208 |
reporter, |
|
209 |
filter_panel_api, |
|
210 |
datasets_selected, parent_dataname, ggplot2_args) { |
|
211 | ! |
with_reporter <- !missing(reporter) && inherits(reporter, "Reporter") |
212 | ! |
with_filter <- !missing(filter_panel_api) && inherits(filter_panel_api, "FilterPanelAPI") |
213 | ! |
checkmate::assert_class(data, "reactive") |
214 | ! |
checkmate::assert_class(isolate(data()), "teal_data") |
215 | ! |
moduleServer(id, function(input, output, session) { |
216 | ! |
teal.logger::log_shiny_input_changes(input, namespace = "teal.modules.general") |
217 | ||
218 |
# if there are < this number of unique records then a numeric |
|
219 |
# variable can be treated as a factor and all factors with < this groups |
|
220 |
# have their values plotted |
|
221 | ! |
.unique_records_for_factor <- 30 |
222 |
# if there are < this number of unique records then a numeric |
|
223 |
# variable is by default treated as a factor |
|
224 | ! |
.unique_records_default_as_factor <- 6 # nolint: object_length. |
225 | ||
226 | ! |
varname_numeric_as_factor <- reactiveValues() |
227 | ||
228 | ! |
datanames <- isolate(teal.data::datanames(data())) |
229 | ! |
datanames <- Filter(function(name) { |
230 | ! |
is.data.frame(isolate(data())[[name]]) |
231 | ! |
}, datanames) |
232 | ||
233 | ! |
checkmate::assert_character(datasets_selected) |
234 | ! |
checkmate::assert_subset(datasets_selected, datanames) |
235 | ! |
if (!identical(datasets_selected, character(0))) { |
236 | ! |
checkmate::assert_subset(datasets_selected, datanames) |
237 | ! |
datanames <- datasets_selected |
238 |
} |
|
239 | ||
240 | ! |
output$ui_variable_browser <- renderUI({ |
241 | ! |
ns <- session$ns |
242 | ! |
do.call( |
243 | ! |
tabsetPanel, |
244 | ! |
c( |
245 | ! |
id = ns("tabset_panel"), |
246 | ! |
do.call( |
247 | ! |
tagList, |
248 | ! |
lapply(datanames, function(dataname) { |
249 | ! |
tabPanel( |
250 | ! |
dataname, |
251 | ! |
tags$div( |
252 | ! |
class = "mt-4", |
253 | ! |
textOutput(ns(paste0("dataset_summary_", dataname))) |
254 |
), |
|
255 | ! |
tags$div( |
256 | ! |
class = "mt-4", |
257 | ! |
teal.widgets::get_dt_rows( |
258 | ! |
ns(paste0("variable_browser_", dataname)), |
259 | ! |
ns(paste0("variable_browser_", dataname, "_rows")) |
260 |
), |
|
261 | ! |
DT::dataTableOutput(ns(paste0("variable_browser_", dataname)), width = "100%") |
262 |
) |
|
263 |
) |
|
264 |
}) |
|
265 |
) |
|
266 |
) |
|
267 |
) |
|
268 |
}) |
|
269 | ||
270 |
# conditionally display checkbox |
|
271 | ! |
shinyjs::toggle( |
272 | ! |
id = "show_parent_vars", |
273 | ! |
condition = length(parent_dataname) > 0 && parent_dataname %in% datanames |
274 |
) |
|
275 | ||
276 | ! |
columns_names <- new.env() |
277 | ||
278 |
# plot_var$data holds the name of the currently selected dataset |
|
279 |
# plot_var$variable[[<dataset_name>]] holds the name of the currently selected |
|
280 |
# variable for dataset <dataset_name> |
|
281 | ! |
plot_var <- reactiveValues(data = NULL, variable = list()) |
282 | ||
283 | ! |
establish_updating_selection(datanames, input, plot_var, columns_names) |
284 | ||
285 |
# validations |
|
286 | ! |
validation_checks <- validate_input(input, plot_var, data) |
287 | ||
288 |
# data_for_analysis is a list with two elements: a column from a dataset and the column label |
|
289 | ! |
plotted_data <- reactive({ |
290 | ! |
validation_checks() |
291 | ||
292 | ! |
get_plotted_data(input, plot_var, data) |
293 |
}) |
|
294 | ||
295 | ! |
treat_numeric_as_factor <- reactive({ |
296 | ! |
if (is_num_var_short(.unique_records_for_factor, input, plotted_data)) { |
297 | ! |
input$numeric_as_factor |
298 |
} else { |
|
299 | ! |
FALSE |
300 |
} |
|
301 |
}) |
|
302 | ||
303 | ! |
render_tabset_panel_content( |
304 | ! |
input = input, |
305 | ! |
output = output, |
306 | ! |
data = data, |
307 | ! |
datanames = datanames, |
308 | ! |
parent_dataname = parent_dataname, |
309 | ! |
columns_names = columns_names, |
310 | ! |
plot_var = plot_var |
311 |
) |
|
312 |
# add used-defined text size to ggplot arguments passed from caller frame |
|
313 | ! |
all_ggplot2_args <- reactive({ |
314 | ! |
user_text <- teal.widgets::ggplot2_args( |
315 | ! |
theme = list( |
316 | ! |
"text" = ggplot2::element_text(size = input[["font_size"]]), |
317 | ! |
"axis.text.x" = ggplot2::element_text(angle = input[["label_rotation"]], hjust = 1) |
318 |
) |
|
319 |
) |
|
320 | ! |
user_theme <- utils::getFromNamespace(sprintf("theme_%s", input[["ggplot_theme"]]), ns = "ggplot2") |
321 | ! |
user_theme <- user_theme() |
322 |
# temporary fix to circumvent assertion issue with resolve_ggplot2_args |
|
323 |
# drop problematic elements |
|
324 | ! |
user_theme <- user_theme[grep("strip.text.y.left", names(user_theme), fixed = TRUE, invert = TRUE)] |
325 | ||
326 | ! |
teal.widgets::resolve_ggplot2_args( |
327 | ! |
user_plot = user_text, |
328 | ! |
user_default = teal.widgets::ggplot2_args(theme = user_theme), |
329 | ! |
module_plot = ggplot2_args |
330 |
) |
|
331 |
}) |
|
332 | ||
333 | ! |
output$ui_numeric_display <- renderUI({ |
334 | ! |
validation_checks() |
335 | ! |
dataname <- input$tabset_panel |
336 | ! |
varname <- plot_var$variable[[dataname]] |
337 | ! |
df <- data()[[dataname]] |
338 | ||
339 | ! |
numeric_ui <- tagList( |
340 | ! |
fluidRow( |
341 | ! |
tags$div( |
342 | ! |
class = "col-md-4", |
343 | ! |
tags$br(), |
344 | ! |
shinyWidgets::switchInput( |
345 | ! |
inputId = session$ns("display_density"), |
346 | ! |
label = "Show density", |
347 | ! |
value = `if`(is.null(isolate(input$display_density)), TRUE, isolate(input$display_density)), |
348 | ! |
width = "50%", |
349 | ! |
labelWidth = "100px", |
350 | ! |
handleWidth = "50px" |
351 |
) |
|
352 |
), |
|
353 | ! |
tags$div( |
354 | ! |
class = "col-md-4", |
355 | ! |
tags$br(), |
356 | ! |
shinyWidgets::switchInput( |
357 | ! |
inputId = session$ns("remove_outliers"), |
358 | ! |
label = "Remove outliers", |
359 | ! |
value = `if`(is.null(isolate(input$remove_outliers)), FALSE, isolate(input$remove_outliers)), |
360 | ! |
width = "50%", |
361 | ! |
labelWidth = "100px", |
362 | ! |
handleWidth = "50px" |
363 |
) |
|
364 |
), |
|
365 | ! |
tags$div( |
366 | ! |
class = "col-md-4", |
367 | ! |
uiOutput(session$ns("outlier_definition_slider_ui")) |
368 |
) |
|
369 |
), |
|
370 | ! |
tags$div( |
371 | ! |
class = "ml-4", |
372 | ! |
uiOutput(session$ns("ui_density_help")), |
373 | ! |
uiOutput(session$ns("ui_outlier_help")) |
374 |
) |
|
375 |
) |
|
376 | ||
377 | ! |
observeEvent(input$numeric_as_factor, ignoreInit = TRUE, { |
378 | ! |
varname_numeric_as_factor[[plot_var$variable[[dataname]]]] <- input$numeric_as_factor |
379 |
}) |
|
380 | ||
381 | ! |
if (is.numeric(df[[varname]])) { |
382 | ! |
unique_entries <- length(unique(df[[varname]])) |
383 | ! |
if (unique_entries < .unique_records_for_factor && unique_entries > 0) { |
384 | ! |
list( |
385 | ! |
checkboxInput( |
386 | ! |
session$ns("numeric_as_factor"), |
387 | ! |
"Treat variable as factor", |
388 | ! |
value = `if`( |
389 | ! |
is.null(varname_numeric_as_factor[[varname]]), |
390 | ! |
unique_entries < .unique_records_default_as_factor, |
391 | ! |
varname_numeric_as_factor[[varname]] |
392 |
) |
|
393 |
), |
|
394 | ! |
conditionalPanel("!input.numeric_as_factor", ns = session$ns, numeric_ui) |
395 |
) |
|
396 | ! |
} else if (unique_entries > 0) { |
397 | ! |
numeric_ui |
398 |
} |
|
399 |
} else { |
|
400 | ! |
NULL |
401 |
} |
|
402 |
}) |
|
403 | ||
404 | ! |
output$ui_histogram_display <- renderUI({ |
405 | ! |
validation_checks() |
406 | ! |
dataname <- input$tabset_panel |
407 | ! |
varname <- plot_var$variable[[dataname]] |
408 | ! |
df <- data()[[dataname]] |
409 | ||
410 | ! |
numeric_ui <- tagList(fluidRow( |
411 | ! |
tags$div( |
412 | ! |
class = "col-md-4", |
413 | ! |
shinyWidgets::switchInput( |
414 | ! |
inputId = session$ns("remove_NA_hist"), |
415 | ! |
label = "Remove NA values", |
416 | ! |
value = FALSE, |
417 | ! |
width = "50%", |
418 | ! |
labelWidth = "100px", |
419 | ! |
handleWidth = "50px" |
420 |
) |
|
421 |
) |
|
422 |
)) |
|
423 | ||
424 | ! |
var <- df[[varname]] |
425 | ! |
if (anyNA(var) && (is.factor(var) || is.character(var) || is.logical(var))) { |
426 | ! |
groups <- unique(as.character(var)) |
427 | ! |
len_groups <- length(groups) |
428 | ! |
if (len_groups >= .unique_records_for_factor) { |
429 | ! |
NULL |
430 |
} else { |
|
431 | ! |
numeric_ui |
432 |
} |
|
433 |
} else { |
|
434 | ! |
NULL |
435 |
} |
|
436 |
}) |
|
437 | ||
438 | ! |
output$outlier_definition_slider_ui <- renderUI({ |
439 | ! |
req(input$remove_outliers) |
440 | ! |
sliderInput( |
441 | ! |
inputId = session$ns("outlier_definition_slider"), |
442 | ! |
tags$div( |
443 | ! |
class = "teal-tooltip", |
444 | ! |
tagList( |
445 | ! |
"Outlier definition:", |
446 | ! |
icon("circle-info"), |
447 | ! |
tags$span( |
448 | ! |
class = "tooltiptext", |
449 | ! |
paste( |
450 | ! |
"Use the slider to choose the cut-off value to define outliers; the larger the value the", |
451 | ! |
"further below Q1/above Q3 points have to be in order to be classed as outliers" |
452 |
) |
|
453 |
) |
|
454 |
) |
|
455 |
), |
|
456 | ! |
min = 1, |
457 | ! |
max = 5, |
458 | ! |
value = 3, |
459 | ! |
step = 0.5 |
460 |
) |
|
461 |
}) |
|
462 | ||
463 | ! |
output$ui_density_help <- renderUI({ |
464 | ! |
req(is.logical(input$display_density)) |
465 | ! |
if (input$display_density) { |
466 | ! |
tags$small(helpText(paste( |
467 | ! |
"Kernel density estimation with gaussian kernel", |
468 | ! |
"and bandwidth function bw.nrd0 (R default)" |
469 |
))) |
|
470 |
} else { |
|
471 | ! |
NULL |
472 |
} |
|
473 |
}) |
|
474 | ||
475 | ! |
output$ui_outlier_help <- renderUI({ |
476 | ! |
req(is.logical(input$remove_outliers), input$outlier_definition_slider) |
477 | ! |
if (input$remove_outliers) { |
478 | ! |
tags$small( |
479 | ! |
helpText( |
480 | ! |
withMathJax(paste0( |
481 | ! |
"Outlier data points (\\( X \\lt Q1 - ", input$outlier_definition_slider, "\\times IQR \\) or |
482 | ! |
\\(Q3 + ", input$outlier_definition_slider, "\\times IQR \\lt X\\)) |
483 | ! |
have not been displayed on the graph and will not be used for any kernel density estimations, ", |
484 | ! |
"although their values remain in the statisics table below." |
485 |
)) |
|
486 |
) |
|
487 |
) |
|
488 |
} else { |
|
489 | ! |
NULL |
490 |
} |
|
491 |
}) |
|
492 | ||
493 | ||
494 | ! |
variable_plot_r <- reactive({ |
495 | ! |
display_density <- `if`(is.null(input$display_density), FALSE, input$display_density) |
496 | ! |
remove_outliers <- `if`(is.null(input$remove_outliers), FALSE, input$remove_outliers) |
497 | ||
498 | ! |
if (remove_outliers) { |
499 | ! |
req(input$outlier_definition_slider) |
500 | ! |
outlier_definition <- as.numeric(input$outlier_definition_slider) |
501 |
} else { |
|
502 | ! |
outlier_definition <- 0 |
503 |
} |
|
504 | ||
505 | ! |
plot_var_summary( |
506 | ! |
var = plotted_data()$data, |
507 | ! |
var_lab = plotted_data()$var_description, |
508 | ! |
wrap_character = 15, |
509 | ! |
numeric_as_factor = treat_numeric_as_factor(), |
510 | ! |
remove_NA_hist = input$remove_NA_hist, |
511 | ! |
display_density = display_density, |
512 | ! |
outlier_definition = outlier_definition, |
513 | ! |
records_for_factor = .unique_records_for_factor, |
514 | ! |
ggplot2_args = all_ggplot2_args() |
515 |
) |
|
516 |
}) |
|
517 | ||
518 | ! |
pws <- teal.widgets::plot_with_settings_srv( |
519 | ! |
id = "variable_plot", |
520 | ! |
plot_r = variable_plot_r, |
521 | ! |
height = c(500, 200, 2000) |
522 |
) |
|
523 | ||
524 | ! |
output$variable_summary_table <- DT::renderDataTable({ |
525 | ! |
var_summary_table( |
526 | ! |
plotted_data()$data, |
527 | ! |
treat_numeric_as_factor(), |
528 | ! |
input$variable_summary_table_rows, |
529 | ! |
if (!is.null(input$remove_outliers) && input$remove_outliers) { |
530 | ! |
req(input$outlier_definition_slider) |
531 | ! |
as.numeric(input$outlier_definition_slider) |
532 |
} else { |
|
533 | ! |
0 |
534 |
} |
|
535 |
) |
|
536 |
}) |
|
537 | ||
538 |
### REPORTER |
|
539 | ! |
if (with_reporter) { |
540 | ! |
card_fun <- function(comment) { |
541 | ! |
card <- teal::TealReportCard$new() |
542 | ! |
card$set_name("Variable Browser Plot") |
543 | ! |
card$append_text("Variable Browser Plot", "header2") |
544 | ! |
if (with_filter) card$append_fs(filter_panel_api$get_filter_state()) |
545 | ! |
card$append_text("Plot", "header3") |
546 | ! |
card$append_plot(variable_plot_r(), dim = pws$dim()) |
547 | ! |
if (!comment == "") { |
548 | ! |
card$append_text("Comment", "header3") |
549 | ! |
card$append_text(comment) |
550 |
} |
|
551 | ! |
card |
552 |
} |
|
553 | ! |
teal.reporter::simple_reporter_srv("simple_reporter", reporter = reporter, card_fun = card_fun) |
554 |
} |
|
555 |
### |
|
556 |
}) |
|
557 |
} |
|
558 | ||
559 |
#' Summarize NAs. |
|
560 |
#' |
|
561 |
#' Summarizes occurrence of missing values in vector. |
|
562 |
#' @param x vector of any type and length |
|
563 |
#' @return Character string describing `NA` occurrence. |
|
564 |
#' @keywords internal |
|
565 |
var_missings_info <- function(x) { |
|
566 | ! |
sprintf("%s [%s%%]", sum(is.na(x)), round(mean(is.na(x) * 100), 2)) |
567 |
} |
|
568 | ||
569 |
#' Summarizes variable |
|
570 |
#' |
|
571 |
#' Creates html summary with statistics relevant to data type. For numeric values it returns central |
|
572 |
#' tendency measures, for factor returns level counts, for Date date range, for other just |
|
573 |
#' number of levels. |
|
574 |
#' |
|
575 |
#' @param x vector of any type |
|
576 |
#' @param numeric_as_factor `logical` should the numeric variable be treated as a factor |
|
577 |
#' @param dt_rows `numeric` current/latest `DT` page length |
|
578 |
#' @param outlier_definition If 0 no outliers are removed, otherwise |
|
579 |
#' outliers (those more than `outlier_definition*IQR below/above Q1/Q3` be removed) |
|
580 |
#' @return text with simple statistics. |
|
581 |
#' @keywords internal |
|
582 |
var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) { |
|
583 | ! |
if (is.null(dt_rows)) { |
584 | ! |
dt_rows <- 10 |
585 |
} |
|
586 | ! |
if (is.numeric(x) && !numeric_as_factor) { |
587 | ! |
req(!any(is.infinite(x))) |
588 | ||
589 | ! |
x <- remove_outliers_from(x, outlier_definition) |
590 | ||
591 | ! |
qvals <- round(stats::quantile(x, na.rm = TRUE, probs = c(0.25, 0.5, 0.75), type = 2), 2) |
592 |
# classical central tendency measures |
|
593 | ||
594 | ! |
summary <- |
595 | ! |
data.frame( |
596 | ! |
Statistic = c("min", "Q1", "median", "mean", "Q3", "max", "sd", "n"), |
597 | ! |
Value = c( |
598 | ! |
round(min(x, na.rm = TRUE), 2), |
599 | ! |
qvals[1], |
600 | ! |
qvals[2], |
601 | ! |
round(mean(x, na.rm = TRUE), 2), |
602 | ! |
qvals[3], |
603 | ! |
round(max(x, na.rm = TRUE), 2), |
604 | ! |
round(stats::sd(x, na.rm = TRUE), 2), |
605 | ! |
length(x[!is.na(x)]) |
606 |
) |
|
607 |
) |
|
608 | ||
609 | ! |
DT::datatable(summary, rownames = FALSE, options = list(dom = "<t>", pageLength = dt_rows)) |
610 | ! |
} else if (is.factor(x) || is.character(x) || (is.numeric(x) && numeric_as_factor) || is.logical(x)) { |
611 |
# make sure factor is ordered numeric |
|
612 | ! |
if (is.numeric(x)) { |
613 | ! |
x <- factor(x, levels = sort(unique(x))) |
614 |
} |
|
615 | ||
616 | ! |
level_counts <- table(x) |
617 | ! |
max_levels_signif <- nchar(level_counts) |
618 | ||
619 | ! |
if (!all(is.na(x))) { |
620 | ! |
levels <- names(level_counts) |
621 | ! |
counts <- sprintf( |
622 | ! |
"%s [%.2f%%]", |
623 | ! |
format(level_counts, width = max_levels_signif), prop.table(level_counts) * 100 |
624 |
) |
|
625 |
} else { |
|
626 | ! |
levels <- character(0) |
627 | ! |
counts <- numeric(0) |
628 |
} |
|
629 | ||
630 | ! |
summary <- data.frame( |
631 | ! |
Level = levels, |
632 | ! |
Count = counts, |
633 | ! |
stringsAsFactors = FALSE |
634 |
) |
|
635 | ||
636 |
# sort the dataset in decreasing order of counts (needed as character variables default to alphabetical) |
|
637 | ! |
summary <- summary[order(summary$Count, decreasing = TRUE), ] |
638 | ||
639 | ! |
dom_opts <- if (nrow(summary) <= 10) { |
640 | ! |
"<t>" |
641 |
} else { |
|
642 | ! |
"<lf<t>ip>" |
643 |
} |
|
644 | ! |
DT::datatable(summary, rownames = FALSE, options = list(dom = dom_opts, pageLength = dt_rows)) |
645 | ! |
} else if (inherits(x, "Date") || inherits(x, "POSIXct") || inherits(x, "POSIXlt")) { |
646 | ! |
summary <- |
647 | ! |
data.frame( |
648 | ! |
Statistic = c("min", "median", "max"), |
649 | ! |
Value = c( |
650 | ! |
min(x, na.rm = TRUE), |
651 | ! |
stats::median(x, na.rm = TRUE), |
652 | ! |
max(x, na.rm = TRUE) |
653 |
) |
|
654 |
) |
|
655 | ! |
DT::datatable(summary, rownames = FALSE, options = list(dom = "<t>", pageLength = dt_rows)) |
656 |
} else { |
|
657 | ! |
NULL |
658 |
} |
|
659 |
} |
|
660 | ||
661 |
#' Plot variable |
|
662 |
#' |
|
663 |
#' Creates summary plot with statistics relevant to data type. |
|
664 |
#' |
|
665 |
#' @inheritParams shared_params |
|
666 |
#' @param var vector of any type to be plotted. For numeric variables it produces histogram with |
|
667 |
#' density line, for factors it creates frequency plot |
|
668 |
#' @param var_lab text describing selected variable to be displayed on the plot |
|
669 |
#' @param wrap_character (`numeric`) number of characters at which to wrap text values of `var` |
|
670 |
#' @param numeric_as_factor (`logical`) should the numeric variable be treated as a factor |
|
671 |
#' @param display_density (`logical`) should density estimation be displayed for numeric values |
|
672 |
#' @param remove_NA_hist (`logical`) should `NA` values be removed for histogram of factor like variables |
|
673 |
#' @param outlier_definition if 0 no outliers are removed, otherwise |
|
674 |
#' outliers (those more than outlier_definition*IQR below/above Q1/Q3 be removed) |
|
675 |
#' @param records_for_factor (`numeric`) if the number of factor levels is >= than this value then |
|
676 |
#' a graph of the factors isn't shown, only a list of values |
|
677 |
#' |
|
678 |
#' @return plot |
|
679 |
#' @keywords internal |
|
680 |
plot_var_summary <- function(var, |
|
681 |
var_lab, |
|
682 |
wrap_character = NULL, |
|
683 |
numeric_as_factor, |
|
684 |
display_density = is.numeric(var), |
|
685 |
remove_NA_hist = FALSE, # nolint: object_name. |
|
686 |
outlier_definition, |
|
687 |
records_for_factor, |
|
688 |
ggplot2_args) { |
|
689 | ! |
checkmate::assert_character(var_lab) |
690 | ! |
checkmate::assert_numeric(wrap_character, null.ok = TRUE) |
691 | ! |
checkmate::assert_flag(numeric_as_factor) |
692 | ! |
checkmate::assert_flag(display_density) |
693 | ! |
checkmate::assert_logical(remove_NA_hist, null.ok = TRUE) |
694 | ! |
checkmate::assert_number(outlier_definition, lower = 0, finite = TRUE) |
695 | ! |
checkmate::assert_integerish(records_for_factor, lower = 0, len = 1, any.missing = FALSE) |
696 | ! |
checkmate::assert_class(ggplot2_args, "ggplot2_args") |
697 | ||
698 | ! |
grid::grid.newpage() |
699 | ||
700 | ! |
plot_main <- if (is.factor(var) || is.character(var) || is.logical(var)) { |
701 | ! |
groups <- unique(as.character(var)) |
702 | ! |
len_groups <- length(groups) |
703 | ! |
if (len_groups >= records_for_factor) { |
704 | ! |
grid::textGrob( |
705 | ! |
sprintf( |
706 | ! |
"%s unique values\n%s:\n %s\n ...\n %s", |
707 | ! |
len_groups, |
708 | ! |
var_lab, |
709 | ! |
paste(utils::head(groups), collapse = ",\n "), |
710 | ! |
paste(utils::tail(groups), collapse = ",\n ") |
711 |
), |
|
712 | ! |
x = grid::unit(1, "line"), |
713 | ! |
y = grid::unit(1, "npc") - grid::unit(1, "line"), |
714 | ! |
just = c("left", "top") |
715 |
) |
|
716 |
} else { |
|
717 | ! |
if (!is.null(wrap_character)) { |
718 | ! |
var <- stringr::str_wrap(var, width = wrap_character) |
719 |
} |
|
720 | ! |
var <- if (isTRUE(remove_NA_hist)) as.vector(stats::na.omit(var)) else var |
721 | ! |
ggplot(data.frame(var), aes(x = forcats::fct_infreq(as.factor(var)))) + |
722 | ! |
geom_bar( |
723 | ! |
stat = "count", aes(fill = ifelse(is.na(var), "withcolor", "")), show.legend = FALSE |
724 |
) + |
|
725 | ! |
scale_fill_manual(values = c("gray50", "tan")) |
726 |
} |
|
727 | ! |
} else if (is.numeric(var)) { |
728 | ! |
validate(need(any(!is.na(var)), "No data left to visualize.")) |
729 | ||
730 |
# Filter out NA |
|
731 | ! |
var <- var[which(!is.na(var))] |
732 | ||
733 | ! |
validate(need(!any(is.infinite(var)), "Cannot display graph when data includes infinite values")) |
734 | ||
735 | ! |
if (numeric_as_factor) { |
736 | ! |
var <- factor(var) |
737 | ! |
ggplot(NULL, aes(x = var)) + |
738 | ! |
geom_histogram(stat = "count") |
739 |
} else { |
|
740 |
# remove outliers |
|
741 | ! |
if (outlier_definition != 0) { |
742 | ! |
number_records <- length(var) |
743 | ! |
var <- remove_outliers_from(var, outlier_definition) |
744 | ! |
number_outliers <- number_records - length(var) |
745 | ! |
outlier_text <- paste0( |
746 | ! |
number_outliers, " outliers (", |
747 | ! |
round(number_outliers / number_records * 100, 2), |
748 | ! |
"% of non-missing records) not shown" |
749 |
) |
|
750 | ! |
validate(need( |
751 | ! |
length(var) > 1, |
752 | ! |
"At least two data points must remain after removing outliers for this graph to be displayed" |
753 |
)) |
|
754 |
} |
|
755 |
## histogram |
|
756 | ! |
binwidth <- get_bin_width(var) |
757 | ! |
p <- ggplot(data = data.frame(var = var), aes(x = var, y = after_stat(count))) + |
758 | ! |
geom_histogram(binwidth = binwidth) + |
759 | ! |
scale_y_continuous( |
760 | ! |
sec.axis = sec_axis( |
761 | ! |
trans = ~ . / nrow(data.frame(var = var)), |
762 | ! |
labels = scales::percent, |
763 | ! |
name = "proportion (in %)" |
764 |
) |
|
765 |
) |
|
766 | ||
767 | ! |
if (display_density) { |
768 | ! |
p <- p + geom_density(aes(y = after_stat(count * binwidth))) |
769 |
} |
|
770 | ||
771 | ! |
if (outlier_definition != 0) { |
772 | ! |
p <- p + annotate( |
773 | ! |
geom = "text", |
774 | ! |
label = outlier_text, |
775 | ! |
x = Inf, y = Inf, |
776 | ! |
hjust = 1.02, vjust = 1.2, |
777 | ! |
color = "black", |
778 |
# explicitly modify geom text size according |
|
779 | ! |
size = ggplot2_args[["theme"]][["text"]][["size"]] / 3.5 |
780 |
) |
|
781 |
} |
|
782 | ! |
p |
783 |
} |
|
784 | ! |
} else if (inherits(var, "Date") || inherits(var, "POSIXct") || inherits(var, "POSIXlt")) { |
785 | ! |
var_num <- as.numeric(var) |
786 | ! |
binwidth <- get_bin_width(var_num, 1) |
787 | ! |
p <- ggplot(data = data.frame(var = var), aes(x = var, y = after_stat(count))) + |
788 | ! |
geom_histogram(binwidth = binwidth) |
789 |
} else { |
|
790 | ! |
grid::textGrob( |
791 | ! |
paste(strwrap( |
792 | ! |
utils::capture.output(utils::str(var)), |
793 | ! |
width = .9 * grid::convertWidth(grid::unit(1, "npc"), "char", TRUE) |
794 | ! |
), collapse = "\n"), |
795 | ! |
x = grid::unit(1, "line"), y = grid::unit(1, "npc") - grid::unit(1, "line"), just = c("left", "top") |
796 |
) |
|
797 |
} |
|
798 | ||
799 | ! |
dev_ggplot2_args <- teal.widgets::ggplot2_args( |
800 | ! |
labs = list(x = var_lab) |
801 |
) |
|
802 |
### |
|
803 | ! |
all_ggplot2_args <- teal.widgets::resolve_ggplot2_args( |
804 | ! |
ggplot2_args, |
805 | ! |
module_plot = dev_ggplot2_args |
806 |
) |
|
807 | ||
808 | ! |
if (is.ggplot(plot_main)) { |
809 | ! |
if (is.numeric(var) && !numeric_as_factor) { |
810 |
# numeric not as factor |
|
811 | ! |
plot_main <- plot_main + |
812 | ! |
theme_light() + |
813 | ! |
list( |
814 | ! |
labs = do.call("labs", all_ggplot2_args$labs), |
815 | ! |
theme = do.call("theme", all_ggplot2_args$theme) |
816 |
) |
|
817 |
} else { |
|
818 |
# factor low number of levels OR numeric as factor OR Date |
|
819 | ! |
plot_main <- plot_main + |
820 | ! |
theme_light() + |
821 | ! |
list( |
822 | ! |
labs = do.call("labs", all_ggplot2_args$labs), |
823 | ! |
theme = do.call("theme", all_ggplot2_args$theme) |
824 |
) |
|
825 |
} |
|
826 | ! |
plot_main <- ggplotGrob(plot_main) |
827 |
} |
|
828 | ||
829 | ! |
grid::grid.draw(plot_main) |
830 | ! |
plot_main |
831 |
} |
|
832 | ||
833 |
is_num_var_short <- function(.unique_records_for_factor, input, data_for_analysis) { |
|
834 | ! |
length(unique(data_for_analysis()$data)) < .unique_records_for_factor && !is.null(input$numeric_as_factor) |
835 |
} |
|
836 | ||
837 |
#' Validates the variable browser inputs |
|
838 |
#' |
|
839 |
#' @param input (`session$input`) the `shiny` session input |
|
840 |
#' @param plot_var (`list`) list of a data frame and an array of variable names |
|
841 |
#' @param data (`teal_data`) the datasets passed to the module |
|
842 |
#' |
|
843 |
#' @returns `logical` TRUE if validations pass; a `shiny` validation error otherwise |
|
844 |
#' @keywords internal |
|
845 |
validate_input <- function(input, plot_var, data) { |
|
846 | ! |
reactive({ |
847 | ! |
dataset_name <- req(input$tabset_panel) |
848 | ! |
varname <- plot_var$variable[[dataset_name]] |
849 | ||
850 | ! |
validate(need(dataset_name, "No data selected")) |
851 | ! |
validate(need(varname, "No variable selected")) |
852 | ! |
df <- data()[[dataset_name]] |
853 | ! |
teal::validate_has_data(df, 1) |
854 | ! |
teal::validate_has_variable(varname = varname, data = df, "Variable not available") |
855 | ||
856 | ! |
TRUE |
857 |
}) |
|
858 |
} |
|
859 | ||
860 |
get_plotted_data <- function(input, plot_var, data) { |
|
861 | ! |
dataset_name <- input$tabset_panel |
862 | ! |
varname <- plot_var$variable[[dataset_name]] |
863 | ! |
df <- data()[[dataset_name]] |
864 | ||
865 | ! |
var_description <- teal.data::col_labels(df)[[varname]] |
866 | ! |
list(data = df[[varname]], var_description = var_description) |
867 |
} |
|
868 | ||
869 |
#' Renders the left-hand side `tabset` panel of the module |
|
870 |
#' |
|
871 |
#' @param datanames (`character`) the name of the dataset |
|
872 |
#' @param parent_dataname (`character`) the name of a parent `dataname` to filter out variables from |
|
873 |
#' @param data (`teal_data`) the object containing all datasets |
|
874 |
#' @param input (`session$input`) the `shiny` session input |
|
875 |
#' @param output (`session$output`) the `shiny` session output |
|
876 |
#' @param columns_names (`environment`) the environment containing bindings for each dataset |
|
877 |
#' @param plot_var (`list`) the list containing the currently selected dataset (tab) and its column names |
|
878 |
#' @keywords internal |
|
879 |
render_tabset_panel_content <- function(datanames, parent_dataname, output, data, input, columns_names, plot_var) { |
|
880 | ! |
lapply(datanames, render_single_tab, |
881 | ! |
input = input, |
882 | ! |
output = output, |
883 | ! |
data = data, |
884 | ! |
parent_dataname = parent_dataname, |
885 | ! |
columns_names = columns_names, |
886 | ! |
plot_var = plot_var |
887 |
) |
|
888 |
} |
|
889 | ||
890 |
#' Renders a single tab in the left-hand side tabset panel |
|
891 |
#' |
|
892 |
#' Renders a single tab in the left-hand side tabset panel. The rendered tab contains |
|
893 |
#' information about one dataset out of many presented in the module. |
|
894 |
#' |
|
895 |
#' @param dataset_name (`character`) the name of the dataset contained in the rendered tab |
|
896 |
#' @param parent_dataname (`character`) the name of a parent `dataname` to filter out variables from |
|
897 |
#' @inheritParams render_tabset_panel_content |
|
898 |
#' @keywords internal |
|
899 |
render_single_tab <- function(dataset_name, parent_dataname, output, data, input, columns_names, plot_var) { |
|
900 | ! |
render_tab_header(dataset_name, output, data) |
901 | ||
902 | ! |
render_tab_table( |
903 | ! |
dataset_name = dataset_name, |
904 | ! |
parent_dataname = parent_dataname, |
905 | ! |
output = output, |
906 | ! |
data = data, |
907 | ! |
input = input, |
908 | ! |
columns_names = columns_names, |
909 | ! |
plot_var = plot_var |
910 |
) |
|
911 |
} |
|
912 | ||
913 |
#' Renders the text headlining a single tab in the left-hand side tabset panel |
|
914 |
#' |
|
915 |
#' @param dataset_name (`character`) the name of the dataset of the tab |
|
916 |
#' @inheritParams render_tabset_panel_content |
|
917 |
#' @keywords internal |
|
918 |
render_tab_header <- function(dataset_name, output, data) { |
|
919 | ! |
dataset_ui_id <- paste0("dataset_summary_", dataset_name) |
920 | ! |
output[[dataset_ui_id]] <- renderText({ |
921 | ! |
df <- data()[[dataset_name]] |
922 | ! |
join_keys <- teal.data::join_keys(data()) |
923 | ! |
if (!is.null(join_keys)) { |
924 | ! |
key <- teal.data::join_keys(data())[dataset_name, dataset_name] |
925 |
} else { |
|
926 | ! |
key <- NULL |
927 |
} |
|
928 | ! |
sprintf( |
929 | ! |
"Dataset with %s unique key rows and %s variables", |
930 | ! |
nrow(unique(`if`(length(key) > 0, df[, key, drop = FALSE], df))), |
931 | ! |
ncol(df) |
932 |
) |
|
933 |
}) |
|
934 |
} |
|
935 | ||
936 |
#' Renders the table for a single dataset in the left-hand side tabset panel |
|
937 |
#' |
|
938 |
#' The table contains column names, column labels, |
|
939 |
#' small summary about NA values and `sparkline` (if appropriate). |
|
940 |
#' |
|
941 |
#' @param dataset_name (`character`) the name of the dataset |
|
942 |
#' @param parent_dataname (`character`) the name of a parent `dataname` to filter out variables from |
|
943 |
#' @inheritParams render_tabset_panel_content |
|
944 |
#' @keywords internal |
|
945 |
render_tab_table <- function(dataset_name, parent_dataname, output, data, input, columns_names, plot_var) { |
|
946 | ! |
table_ui_id <- paste0("variable_browser_", dataset_name) |
947 | ||
948 | ! |
output[[table_ui_id]] <- DT::renderDataTable({ |
949 | ! |
df <- data()[[dataset_name]] |
950 | ||
951 | ! |
get_vars_df <- function(input, dataset_name, parent_name, data) { |
952 | ! |
data_cols <- colnames(df) |
953 | ! |
if (isTRUE(input$show_parent_vars)) { |
954 | ! |
data_cols |
955 | ! |
} else if (dataset_name != parent_name && parent_name %in% names(data)) { |
956 | ! |
setdiff(data_cols, colnames(data()[[parent_name]])) |
957 |
} else { |
|
958 | ! |
data_cols |
959 |
} |
|
960 |
} |
|
961 | ||
962 | ! |
if (length(parent_dataname) > 0) { |
963 | ! |
df_vars <- get_vars_df(input, dataset_name, parent_dataname, data) |
964 | ! |
df <- df[df_vars] |
965 |
} |
|
966 | ||
967 | ! |
if (is.null(df) || ncol(df) == 0) { |
968 | ! |
columns_names[[dataset_name]] <- character(0) |
969 | ! |
df_output <- data.frame( |
970 | ! |
Type = character(0), |
971 | ! |
Variable = character(0), |
972 | ! |
Label = character(0), |
973 | ! |
Missings = character(0), |
974 | ! |
Sparklines = character(0), |
975 | ! |
stringsAsFactors = FALSE |
976 |
) |
|
977 |
} else { |
|
978 |
# extract data variable labels |
|
979 | ! |
labels <- teal.data::col_labels(df) |
980 | ||
981 | ! |
columns_names[[dataset_name]] <- names(labels) |
982 | ||
983 |
# calculate number of missing values |
|
984 | ! |
missings <- vapply( |
985 | ! |
df, |
986 | ! |
var_missings_info, |
987 | ! |
FUN.VALUE = character(1), |
988 | ! |
USE.NAMES = FALSE |
989 |
) |
|
990 | ||
991 |
# get icons proper for the data types |
|
992 | ! |
icons <- vapply(df, function(x) class(x)[1L], character(1L)) |
993 | ||
994 | ! |
join_keys <- teal.data::join_keys(data()) |
995 | ! |
if (!is.null(join_keys)) { |
996 | ! |
icons[intersect(join_keys[dataset_name, dataset_name], colnames(df))] <- "primary_key" |
997 |
} |
|
998 | ! |
icons <- variable_type_icons(icons) |
999 | ||
1000 |
# generate sparklines |
|
1001 | ! |
sparklines_html <- vapply( |
1002 | ! |
df, |
1003 | ! |
create_sparklines, |
1004 | ! |
FUN.VALUE = character(1), |
1005 | ! |
USE.NAMES = FALSE |
1006 |
) |
|
1007 | ||
1008 | ! |
df_output <- data.frame( |
1009 | ! |
Type = icons, |
1010 | ! |
Variable = names(labels), |
1011 | ! |
Label = labels, |
1012 | ! |
Missings = missings, |
1013 | ! |
Sparklines = sparklines_html, |
1014 | ! |
stringsAsFactors = FALSE |
1015 |
) |
|
1016 |
} |
|
1017 | ||
1018 |
# Select row 1 as default / fallback |
|
1019 | ! |
selected_ix <- 1 |
1020 |
# Define starting page index (base-0 index of the first item on page |
|
1021 |
# note: in many cases it's not the item itself |
|
1022 | ! |
selected_page_ix <- 0 |
1023 | ||
1024 |
# Retrieve current selected variable if any |
|
1025 | ! |
isolated_variable <- isolate(plot_var$variable[[dataset_name]]) |
1026 | ||
1027 | ! |
if (!is.null(isolated_variable)) { |
1028 | ! |
index <- which(columns_names[[dataset_name]] == isolated_variable)[1] |
1029 | ! |
if (!is.null(index) && !is.na(index) && length(index) > 0) selected_ix <- index |
1030 |
} |
|
1031 | ||
1032 |
# Retrieve the index of the first item of the current page |
|
1033 |
# it works with varying number of entries on the page (10, 25, ...) |
|
1034 | ! |
table_id_sel <- paste0("variable_browser_", dataset_name, "_state") |
1035 | ! |
dt_state <- isolate(input[[table_id_sel]]) |
1036 | ! |
if (selected_ix != 1 && !is.null(dt_state)) { |
1037 | ! |
selected_page_ix <- floor(selected_ix / dt_state$length) * dt_state$length |
1038 |
} |
|
1039 | ||
1040 | ! |
DT::datatable( |
1041 | ! |
df_output, |
1042 | ! |
escape = FALSE, |
1043 | ! |
rownames = FALSE, |
1044 | ! |
selection = list(mode = "single", target = "row", selected = selected_ix), |
1045 | ! |
options = list( |
1046 | ! |
fnDrawCallback = htmlwidgets::JS("function() { HTMLWidgets.staticRender(); }"), |
1047 | ! |
pageLength = input[[paste0(table_ui_id, "_rows")]], |
1048 | ! |
displayStart = selected_page_ix |
1049 |
) |
|
1050 |
) |
|
1051 |
}) |
|
1052 |
} |
|
1053 | ||
1054 |
#' Creates observers updating the currently selected column |
|
1055 |
#' |
|
1056 |
#' The created observers update the column currently selected in the left-hand side |
|
1057 |
#' tabset panel. |
|
1058 |
#' |
|
1059 |
#' @note |
|
1060 |
#' Creates an observer for each dataset (each tab in the tabset panel). |
|
1061 |
#' |
|
1062 |
#' @inheritParams render_tabset_panel_content |
|
1063 |
#' @keywords internal |
|
1064 |
establish_updating_selection <- function(datanames, input, plot_var, columns_names) { |
|
1065 | ! |
lapply(datanames, function(dataset_name) { |
1066 | ! |
table_ui_id <- paste0("variable_browser_", dataset_name) |
1067 | ! |
table_id_sel <- paste0(table_ui_id, "_rows_selected") |
1068 | ! |
observeEvent(input[[table_id_sel]], { |
1069 | ! |
plot_var$data <- dataset_name |
1070 | ! |
plot_var$variable[[dataset_name]] <- columns_names[[dataset_name]][input[[table_id_sel]]] |
1071 |
}) |
|
1072 |
}) |
|
1073 |
} |
|
1074 | ||
1075 |
get_bin_width <- function(x_vec, scaling_factor = 2) { |
|
1076 | ! |
x_vec <- x_vec[!is.na(x_vec)] |
1077 | ! |
qntls <- stats::quantile(x_vec, probs = c(0.1, 0.25, 0.75, 0.9), type = 2) |
1078 | ! |
iqr <- qntls[3] - qntls[2] |
1079 | ! |
binwidth <- max(scaling_factor * iqr / length(x_vec) ^ (1 / 3), sqrt(qntls[4] - qntls[1])) # styler: off |
1080 | ! |
binwidth <- ifelse(binwidth == 0, 1, binwidth) |
1081 |
# to ensure at least two bins when variable span is very small |
|
1082 | ! |
x_span <- diff(range(x_vec)) |
1083 | ! |
if (isTRUE(x_span / binwidth >= 2)) binwidth else x_span / 2 |
1084 |
} |
|
1085 | ||
1086 |
#' Removes the outlier observation from an array |
|
1087 |
#' |
|
1088 |
#' @param var (`numeric`) a numeric vector |
|
1089 |
#' @param outlier_definition (`numeric`) if `0` then no outliers are removed, otherwise |
|
1090 |
#' outliers (those more than `outlier_definition*IQR below/above Q1/Q3`) are removed |
|
1091 |
#' @returns (`numeric`) vector without the outlier values |
|
1092 |
#' @keywords internal |
|
1093 |
remove_outliers_from <- function(var, outlier_definition) { |
|
1094 | 3x |
if (outlier_definition == 0) { |
1095 | 1x |
return(var) |
1096 |
} |
|
1097 | 2x |
q1_q3 <- stats::quantile(var, probs = c(0.25, 0.75), type = 2, na.rm = TRUE) |
1098 | 2x |
iqr <- q1_q3[2] - q1_q3[1] |
1099 | 2x |
var[var >= q1_q3[1] - outlier_definition * iqr & var <= q1_q3[2] + outlier_definition * iqr] |
1100 |
} |
|
1101 | ||
1102 | ||
1103 |
# sparklines ---- |
|
1104 | ||
1105 |
#' S3 generic for `sparkline` widget HTML |
|
1106 |
#' |
|
1107 |
#' Generates the `sparkline` HTML code corresponding to the input array. |
|
1108 |
#' For numeric variables creates a box plot, for character and factors - bar plot. |
|
1109 |
#' Produces an empty string for variables of other types. |
|
1110 |
#' |
|
1111 |
#' @param arr vector of any type and length |
|
1112 |
#' @param width `numeric` the width of the `sparkline` widget (pixels) |
|
1113 |
#' @param bar_spacing `numeric` the spacing between the bars (in pixels) |
|
1114 |
#' @param bar_width `numeric` the width of the bars (in pixels) |
|
1115 |
#' @param ... `list` additional options passed to bar plots of `jquery.sparkline`; |
|
1116 |
#' see [`jquery.sparkline docs`](https://omnipotent.net/jquery.sparkline/#common) |
|
1117 |
#' |
|
1118 |
#' @return Character string containing HTML code of the `sparkline` HTML widget. |
|
1119 |
#' @keywords internal |
|
1120 |
create_sparklines <- function(arr, width = 150, ...) { |
|
1121 | ! |
if (all(is.null(arr))) { |
1122 | ! |
return("") |
1123 |
} |
|
1124 | ! |
UseMethod("create_sparklines") |
1125 |
} |
|
1126 | ||
1127 |
#' @rdname create_sparklines |
|
1128 |
#' @keywords internal |
|
1129 |
#' @export |
|
1130 |
create_sparklines.logical <- function(arr, ...) { |
|
1131 | ! |
create_sparklines(as.factor(arr)) |
1132 |
} |
|
1133 | ||
1134 |
#' @rdname create_sparklines |
|
1135 |
#' @keywords internal |
|
1136 |
#' @export |
|
1137 |
create_sparklines.numeric <- function(arr, width = 150, ...) { |
|
1138 | ! |
if (any(is.infinite(arr))) { |
1139 | ! |
return(as.character(tags$code("infinite values", class = "text-blue"))) |
1140 |
} |
|
1141 | ! |
if (length(arr) > 100000) { |
1142 | ! |
return(as.character(tags$code("Too many rows (>100000)", class = "text-blue"))) |
1143 |
} |
|
1144 | ||
1145 | ! |
arr <- arr[!is.na(arr)] |
1146 | ! |
sparkline::spk_chr(unname(arr), type = "box", width = width, ...) |
1147 |
} |
|
1148 | ||
1149 |
#' @rdname create_sparklines |
|
1150 |
#' @keywords internal |
|
1151 |
#' @export |
|
1152 |
create_sparklines.character <- function(arr, ...) { |
|
1153 | ! |
return(create_sparklines(as.factor(arr))) |
1154 |
} |
|
1155 | ||
1156 | ||
1157 |
#' @rdname create_sparklines |
|
1158 |
#' @keywords internal |
|
1159 |
#' @export |
|
1160 |
create_sparklines.factor <- function(arr, width = 150, bar_spacing = 5, bar_width = 20, ...) { |
|
1161 | ! |
decreasing_order <- TRUE |
1162 | ||
1163 | ! |
counts <- table(arr) |
1164 | ! |
if (length(counts) >= 100) { |
1165 | ! |
return(as.character(tags$code("> 99 levels", class = "text-blue"))) |
1166 | ! |
} else if (length(counts) == 0) { |
1167 | ! |
return(as.character(tags$code("no levels", class = "text-blue"))) |
1168 | ! |
} else if (length(counts) == 1) { |
1169 | ! |
return(as.character(tags$code("one level", class = "text-blue"))) |
1170 |
} |
|
1171 | ||
1172 |
# Summarize the occurences of different levels |
|
1173 |
# and get the maximum and minimum number of occurences |
|
1174 |
# This is needed for the sparkline to correctly display the bar plots |
|
1175 |
# Otherwise they are cropped |
|
1176 | ! |
counts <- sort(counts, decreasing = decreasing_order, method = "radix") |
1177 | ! |
max_value <- if (decreasing_order) counts[1] else counts[length[counts]] |
1178 | ! |
max_value <- unname(max_value) |
1179 | ||
1180 | ! |
sparkline::spk_chr( |
1181 | ! |
unname(counts), |
1182 | ! |
type = "bar", |
1183 | ! |
chartRangeMin = 0, |
1184 | ! |
chartRangeMax = max_value, |
1185 | ! |
width = width, |
1186 | ! |
barWidth = bar_width, |
1187 | ! |
barSpacing = bar_spacing, |
1188 | ! |
tooltipFormatter = custom_sparkline_formatter(names(counts), as.vector(counts)) |
1189 |
) |
|
1190 |
} |
|
1191 | ||
1192 |
#' @rdname create_sparklines |
|
1193 |
#' @keywords internal |
|
1194 |
#' @export |
|
1195 |
create_sparklines.Date <- function(arr, width = 150, bar_spacing = 5, bar_width = 20, ...) { |
|
1196 | ! |
arr_num <- as.numeric(arr) |
1197 | ! |
arr_num <- sort(arr_num, decreasing = FALSE, method = "radix") |
1198 | ! |
binwidth <- get_bin_width(arr_num, 1) |
1199 | ! |
bins <- floor(diff(range(arr_num)) / binwidth) + 1 |
1200 | ! |
if (all(is.na(bins))) { |
1201 | ! |
return(as.character(tags$code("only NA", class = "text-blue"))) |
1202 | ! |
} else if (bins == 1) { |
1203 | ! |
return(as.character(tags$code("one date", class = "text-blue"))) |
1204 |
} |
|
1205 | ! |
counts <- as.vector(unname(base::table(cut(arr_num, breaks = bins)))) |
1206 | ! |
max_value <- max(counts) |
1207 | ||
1208 | ! |
start_bins <- as.integer(seq(1, length(arr_num), length.out = bins)) |
1209 | ! |
labels_start <- as.character(as.Date(arr_num[start_bins], origin = as.Date("1970-01-01"))) |
1210 | ! |
labels <- paste("Start:", labels_start) |
1211 | ||
1212 | ! |
sparkline::spk_chr( |
1213 | ! |
unname(counts), |
1214 | ! |
type = "bar", |
1215 | ! |
chartRangeMin = 0, |
1216 | ! |
chartRangeMax = max_value, |
1217 | ! |
width = width, |
1218 | ! |
barWidth = bar_width, |
1219 | ! |
barSpacing = bar_spacing, |
1220 | ! |
tooltipFormatter = custom_sparkline_formatter(labels, counts) |
1221 |
) |
|
1222 |
} |
|
1223 | ||
1224 |
#' @rdname create_sparklines |
|
1225 |
#' @keywords internal |
|
1226 |
#' @export |
|
1227 |
create_sparklines.POSIXct <- function(arr, width = 150, bar_spacing = 5, bar_width = 20, ...) { |
|
1228 | ! |
arr_num <- as.numeric(arr) |
1229 | ! |
arr_num <- sort(arr_num, decreasing = FALSE, method = "radix") |
1230 | ! |
binwidth <- get_bin_width(arr_num, 1) |
1231 | ! |
bins <- floor(diff(range(arr_num)) / binwidth) + 1 |
1232 | ! |
if (all(is.na(bins))) { |
1233 | ! |
return(as.character(tags$code("only NA", class = "text-blue"))) |
1234 | ! |
} else if (bins == 1) { |
1235 | ! |
return(as.character(tags$code("one date-time", class = "text-blue"))) |
1236 |
} |
|
1237 | ! |
counts <- as.vector(unname(base::table(cut(arr_num, breaks = bins)))) |
1238 | ! |
max_value <- max(counts) |
1239 | ||
1240 | ! |
start_bins <- as.integer(seq(1, length(arr_num), length.out = bins)) |
1241 | ! |
labels_start <- as.character(format(as.POSIXct(arr_num[start_bins], origin = as.Date("1970-01-01")), "%Y-%m-%d")) |
1242 | ! |
labels <- paste("Start:", labels_start) |
1243 | ||
1244 | ! |
sparkline::spk_chr( |
1245 | ! |
unname(counts), |
1246 | ! |
type = "bar", |
1247 | ! |
chartRangeMin = 0, |
1248 | ! |
chartRangeMax = max_value, |
1249 | ! |
width = width, |
1250 | ! |
barWidth = bar_width, |
1251 | ! |
barSpacing = bar_spacing, |
1252 | ! |
tooltipFormatter = custom_sparkline_formatter(labels, counts) |
1253 |
) |
|
1254 |
} |
|
1255 | ||
1256 |
#' @rdname create_sparklines |
|
1257 |
#' @keywords internal |
|
1258 |
#' @export |
|
1259 |
create_sparklines.POSIXlt <- function(arr, width = 150, bar_spacing = 5, bar_width = 20, ...) { |
|
1260 | ! |
arr_num <- as.numeric(arr) |
1261 | ! |
arr_num <- sort(arr_num, decreasing = FALSE, method = "radix") |
1262 | ! |
binwidth <- get_bin_width(arr_num, 1) |
1263 | ! |
bins <- floor(diff(range(arr_num)) / binwidth) + 1 |
1264 | ! |
if (all(is.na(bins))) { |
1265 | ! |
return(as.character(tags$code("only NA", class = "text-blue"))) |
1266 | ! |
} else if (bins == 1) { |
1267 | ! |
return(as.character(tags$code("one date-time", class = "text-blue"))) |
1268 |
} |
|
1269 | ! |
counts <- as.vector(unname(base::table(cut(arr_num, breaks = bins)))) |
1270 | ! |
max_value <- max(counts) |
1271 | ||
1272 | ! |
start_bins <- as.integer(seq(1, length(arr_num), length.out = bins)) |
1273 | ! |
labels_start <- as.character(format(as.POSIXct(arr_num[start_bins], origin = as.Date("1970-01-01")), "%Y-%m-%d")) |
1274 | ! |
labels <- paste("Start:", labels_start) |
1275 | ||
1276 | ! |
sparkline::spk_chr( |
1277 | ! |
unname(counts), |
1278 | ! |
type = "bar", |
1279 | ! |
chartRangeMin = 0, |
1280 | ! |
chartRangeMax = max_value, |
1281 | ! |
width = width, |
1282 | ! |
barWidth = bar_width, |
1283 | ! |
barSpacing = bar_spacing, |
1284 | ! |
tooltipFormatter = custom_sparkline_formatter(labels, counts) |
1285 |
) |
|
1286 |
} |
|
1287 | ||
1288 |
#' @rdname create_sparklines |
|
1289 |
#' @keywords internal |
|
1290 |
#' @export |
|
1291 |
create_sparklines.default <- function(arr, width = 150, ...) { |
|
1292 | ! |
as.character(tags$code("unsupported variable type", class = "text-blue")) |
1293 |
} |
|
1294 | ||
1295 | ||
1296 |
custom_sparkline_formatter <- function(labels, counts) { |
|
1297 | ! |
htmlwidgets::JS( |
1298 | ! |
sprintf( |
1299 | ! |
"function(sparkline, options, field) { |
1300 | ! |
return 'ID: ' + %s[field[0].offset] + '<br>' + 'Count: ' + %s[field[0].offset]; |
1301 |
}", |
|
1302 | ! |
jsonlite::toJSON(labels), |
1303 | ! |
jsonlite::toJSON(counts) |
1304 |
) |
|
1305 |
) |
|
1306 |
} |
1 |
#' `teal` module: Outliers analysis |
|
2 |
#' |
|
3 |
#' Module to analyze and identify outliers using different methods |
|
4 |
#' such as IQR, Z-score, and Percentiles, and offers visualizations including |
|
5 |
#' box plots, density plots, and cumulative distribution plots to help interpret the outliers. |
|
6 |
#' |
|
7 |
#' @inheritParams teal::module |
|
8 |
#' @inheritParams shared_params |
|
9 |
#' |
|
10 |
#' @param outlier_var (`data_extract_spec` or `list` of multiple `data_extract_spec`) |
|
11 |
#' Specifies variable(s) to be analyzed for outliers. |
|
12 |
#' @param categorical_var (`data_extract_spec` or `list` of multiple `data_extract_spec`) optional, |
|
13 |
#' specifies the categorical variable(s) to split the selected outlier variables on. |
|
14 |
#' |
|
15 |
#' @templateVar ggnames "Boxplot","Density Plot","Cumulative Distribution Plot" |
|
16 |
#' @template ggplot2_args_multi |
|
17 |
#' |
|
18 |
#' @inherit shared_params return |
|
19 |
#' |
|
20 |
#' @examplesShinylive |
|
21 |
#' library(teal.modules.general) |
|
22 |
#' interactive <- function() TRUE |
|
23 |
#' {{ next_example }} |
|
24 |
#' @examples |
|
25 |
#' # general data example |
|
26 |
#' data <- teal_data() |
|
27 |
#' data <- within(data, { |
|
28 |
#' CO2 <- CO2 |
|
29 |
#' CO2[["primary_key"]] <- seq_len(nrow(CO2)) |
|
30 |
#' }) |
|
31 |
#' datanames(data) <- "CO2" |
|
32 |
#' join_keys(data) <- join_keys(join_key("CO2", "CO2", "primary_key")) |
|
33 |
#' |
|
34 |
#' vars <- choices_selected(variable_choices(data[["CO2"]], c("Plant", "Type", "Treatment"))) |
|
35 |
#' |
|
36 |
#' app <- init( |
|
37 |
#' data = data, |
|
38 |
#' modules = modules( |
|
39 |
#' tm_outliers( |
|
40 |
#' outlier_var = list( |
|
41 |
#' data_extract_spec( |
|
42 |
#' dataname = "CO2", |
|
43 |
#' select = select_spec( |
|
44 |
#' label = "Select variable:", |
|
45 |
#' choices = variable_choices(data[["CO2"]], c("conc", "uptake")), |
|
46 |
#' selected = "uptake", |
|
47 |
#' multiple = FALSE, |
|
48 |
#' fixed = FALSE |
|
49 |
#' ) |
|
50 |
#' ) |
|
51 |
#' ), |
|
52 |
#' categorical_var = list( |
|
53 |
#' data_extract_spec( |
|
54 |
#' dataname = "CO2", |
|
55 |
#' filter = filter_spec( |
|
56 |
#' vars = vars, |
|
57 |
#' choices = value_choices(data[["CO2"]], vars$selected), |
|
58 |
#' selected = value_choices(data[["CO2"]], vars$selected), |
|
59 |
#' multiple = TRUE |
|
60 |
#' ) |
|
61 |
#' ) |
|
62 |
#' ) |
|
63 |
#' ) |
|
64 |
#' ) |
|
65 |
#' ) |
|
66 |
#' if (interactive()) { |
|
67 |
#' shinyApp(app$ui, app$server) |
|
68 |
#' } |
|
69 |
#' |
|
70 |
#' @examplesShinylive |
|
71 |
#' library(teal.modules.general) |
|
72 |
#' interactive <- function() TRUE |
|
73 |
#' {{ next_example }} |
|
74 |
#' @examples |
|
75 |
#' # CDISC data example |
|
76 |
#' data <- teal_data() |
|
77 |
#' data <- within(data, { |
|
78 |
#' ADSL <- rADSL |
|
79 |
#' }) |
|
80 |
#' datanames(data) <- "ADSL" |
|
81 |
#' join_keys(data) <- default_cdisc_join_keys[datanames(data)] |
|
82 |
#' |
|
83 |
#' fact_vars_adsl <- names(Filter(isTRUE, sapply(data[["ADSL"]], is.factor))) |
|
84 |
#' vars <- choices_selected(variable_choices(data[["ADSL"]], fact_vars_adsl)) |
|
85 |
#' |
|
86 |
#' app <- init( |
|
87 |
#' data = data, |
|
88 |
#' modules = modules( |
|
89 |
#' tm_outliers( |
|
90 |
#' outlier_var = list( |
|
91 |
#' data_extract_spec( |
|
92 |
#' dataname = "ADSL", |
|
93 |
#' select = select_spec( |
|
94 |
#' label = "Select variable:", |
|
95 |
#' choices = variable_choices(data[["ADSL"]], c("AGE", "BMRKR1")), |
|
96 |
#' selected = "AGE", |
|
97 |
#' multiple = FALSE, |
|
98 |
#' fixed = FALSE |
|
99 |
#' ) |
|
100 |
#' ) |
|
101 |
#' ), |
|
102 |
#' categorical_var = list( |
|
103 |
#' data_extract_spec( |
|
104 |
#' dataname = "ADSL", |
|
105 |
#' filter = filter_spec( |
|
106 |
#' vars = vars, |
|
107 |
#' choices = value_choices(data[["ADSL"]], vars$selected), |
|
108 |
#' selected = value_choices(data[["ADSL"]], vars$selected), |
|
109 |
#' multiple = TRUE |
|
110 |
#' ) |
|
111 |
#' ) |
|
112 |
#' ) |
|
113 |
#' ) |
|
114 |
#' ) |
|
115 |
#' ) |
|
116 |
#' if (interactive()) { |
|
117 |
#' shinyApp(app$ui, app$server) |
|
118 |
#' } |
|
119 |
#' |
|
120 |
#' @export |
|
121 |
#' |
|
122 |
tm_outliers <- function(label = "Outliers Module", |
|
123 |
outlier_var, |
|
124 |
categorical_var = NULL, |
|
125 |
ggtheme = c("gray", "bw", "linedraw", "light", "dark", "minimal", "classic", "void"), |
|
126 |
ggplot2_args = teal.widgets::ggplot2_args(), |
|
127 |
plot_height = c(600, 200, 2000), |
|
128 |
plot_width = NULL, |
|
129 |
pre_output = NULL, |
|
130 |
post_output = NULL) { |
|
131 | ! |
message("Initializing tm_outliers") |
132 | ||
133 |
# Normalize the parameters |
|
134 | ! |
if (inherits(outlier_var, "data_extract_spec")) outlier_var <- list(outlier_var) |
135 | ! |
if (inherits(categorical_var, "data_extract_spec")) categorical_var <- list(categorical_var) |
136 | ! |
if (inherits(ggplot2_args, "ggplot2_args")) ggplot2_args <- list(default = ggplot2_args) |
137 | ||
138 |
# Start of assertions |
|
139 | ! |
checkmate::assert_string(label) |
140 | ! |
checkmate::assert_list(outlier_var, types = "data_extract_spec") |
141 | ||
142 | ! |
checkmate::assert_list(categorical_var, types = "data_extract_spec", null.ok = TRUE) |
143 | ! |
if (is.list(categorical_var)) { |
144 | ! |
lapply(categorical_var, function(x) { |
145 | ! |
if (length(x$filter) > 1L) { |
146 | ! |
stop("tm_outliers: categorical_var data_extract_specs may only specify one filter_spec", call. = FALSE) |
147 |
} |
|
148 |
}) |
|
149 |
} |
|
150 | ||
151 | ! |
ggtheme <- match.arg(ggtheme) |
152 | ||
153 | ! |
plot_choices <- c("Boxplot", "Density Plot", "Cumulative Distribution Plot") |
154 | ! |
checkmate::assert_list(ggplot2_args, types = "ggplot2_args") |
155 | ! |
checkmate::assert_subset(names(ggplot2_args), c("default", plot_choices)) |
156 | ||
157 | ! |
checkmate::assert_numeric(plot_height, len = 3, any.missing = FALSE, finite = TRUE) |
158 | ! |
checkmate::assert_numeric(plot_height[1], lower = plot_height[2], upper = plot_height[3], .var.name = "plot_height") |
159 | ! |
checkmate::assert_numeric(plot_width, len = 3, any.missing = FALSE, null.ok = TRUE, finite = TRUE) |
160 | ! |
checkmate::assert_numeric( |
161 | ! |
plot_width[1], |
162 | ! |
lower = plot_width[2], upper = plot_width[3], null.ok = TRUE, .var.name = "plot_width" |
163 |
) |
|
164 | ||
165 | ! |
checkmate::assert_multi_class(pre_output, c("shiny.tag", "shiny.tag.list", "html"), null.ok = TRUE) |
166 | ! |
checkmate::assert_multi_class(post_output, c("shiny.tag", "shiny.tag.list", "html"), null.ok = TRUE) |
167 |
# End of assertions |
|
168 | ||
169 |
# Make UI args |
|
170 | ! |
args <- as.list(environment()) |
171 | ||
172 | ! |
data_extract_list <- list( |
173 | ! |
outlier_var = outlier_var, |
174 | ! |
categorical_var = categorical_var |
175 |
) |
|
176 | ||
177 | ! |
ans <- module( |
178 | ! |
label = label, |
179 | ! |
server = srv_outliers, |
180 | ! |
server_args = c( |
181 | ! |
data_extract_list, |
182 | ! |
list(plot_height = plot_height, plot_width = plot_width, ggplot2_args = ggplot2_args) |
183 |
), |
|
184 | ! |
ui = ui_outliers, |
185 | ! |
ui_args = args, |
186 | ! |
datanames = teal.transform::get_extract_datanames(data_extract_list) |
187 |
) |
|
188 | ! |
attr(ans, "teal_bookmarkable") <- TRUE |
189 | ! |
ans |
190 |
} |
|
191 | ||
192 |
# UI function for the outliers module |
|
193 |
ui_outliers <- function(id, ...) { |
|
194 | ! |
args <- list(...) |
195 | ! |
ns <- NS(id) |
196 | ! |
is_single_dataset_value <- teal.transform::is_single_dataset(args$outlier_var, args$categorical_var) |
197 | ||
198 | ! |
teal.widgets::standard_layout( |
199 | ! |
output = teal.widgets::white_small_well( |
200 | ! |
uiOutput(ns("total_outliers")), |
201 | ! |
DT::dataTableOutput(ns("summary_table")), |
202 | ! |
uiOutput(ns("total_missing")), |
203 | ! |
tags$br(), tags$hr(), |
204 | ! |
tabsetPanel( |
205 | ! |
id = ns("tabs"), |
206 | ! |
tabPanel( |
207 | ! |
"Boxplot", |
208 | ! |
teal.widgets::plot_with_settings_ui(id = ns("box_plot")) |
209 |
), |
|
210 | ! |
tabPanel( |
211 | ! |
"Density Plot", |
212 | ! |
teal.widgets::plot_with_settings_ui(id = ns("density_plot")) |
213 |
), |
|
214 | ! |
tabPanel( |
215 | ! |
"Cumulative Distribution Plot", |
216 | ! |
teal.widgets::plot_with_settings_ui(id = ns("cum_density_plot")) |
217 |
) |
|
218 |
), |
|
219 | ! |
tags$br(), tags$hr(), |
220 | ! |
uiOutput(ns("table_ui_wrap")), |
221 | ! |
DT::dataTableOutput(ns("table_ui")) |
222 |
), |
|
223 | ! |
encoding = tags$div( |
224 |
### Reporter |
|
225 | ! |
teal.reporter::simple_reporter_ui(ns("simple_reporter")), |
226 |
### |
|
227 | ! |
tags$label("Encodings", class = "text-primary"), |
228 | ! |
teal.transform::datanames_input(args[c("outlier_var", "categorical_var")]), |
229 | ! |
teal.transform::data_extract_ui( |
230 | ! |
id = ns("outlier_var"), |
231 | ! |
label = "Variable", |
232 | ! |
data_extract_spec = args$outlier_var, |
233 | ! |
is_single_dataset = is_single_dataset_value |
234 |
), |
|
235 | ! |
if (!is.null(args$categorical_var)) { |
236 | ! |
teal.transform::data_extract_ui( |
237 | ! |
id = ns("categorical_var"), |
238 | ! |
label = "Categorical factor", |
239 | ! |
data_extract_spec = args$categorical_var, |
240 | ! |
is_single_dataset = is_single_dataset_value |
241 |
) |
|
242 |
}, |
|
243 | ! |
conditionalPanel( |
244 | ! |
condition = paste0("input['", ns("tabs"), "'] == 'Boxplot'"), |
245 | ! |
teal.widgets::optionalSelectInput( |
246 | ! |
inputId = ns("boxplot_alts"), |
247 | ! |
label = "Plot type", |
248 | ! |
choices = c("Box plot", "Violin plot"), |
249 | ! |
selected = "Box plot", |
250 | ! |
multiple = FALSE |
251 |
) |
|
252 |
), |
|
253 | ! |
shinyjs::hidden(checkboxInput(ns("split_outliers"), "Define outliers based on group splitting", value = FALSE)), |
254 | ! |
shinyjs::hidden(checkboxInput(ns("order_by_outlier"), "Re-order categories by outliers [by %]", value = FALSE)), |
255 | ! |
teal.widgets::panel_group( |
256 | ! |
teal.widgets::panel_item( |
257 | ! |
title = "Method parameters", |
258 | ! |
collapsed = FALSE, |
259 | ! |
teal.widgets::optionalSelectInput( |
260 | ! |
inputId = ns("method"), |
261 | ! |
label = "Method", |
262 | ! |
choices = c("IQR", "Z-score", "Percentile"), |
263 | ! |
selected = "IQR", |
264 | ! |
multiple = FALSE |
265 |
), |
|
266 | ! |
conditionalPanel( |
267 | ! |
condition = |
268 | ! |
paste0("input['", ns("method"), "'] == 'IQR'"), |
269 | ! |
sliderInput( |
270 | ! |
ns("iqr_slider"), |
271 | ! |
"Outlier range:", |
272 | ! |
min = 1, |
273 | ! |
max = 5, |
274 | ! |
value = 3, |
275 | ! |
step = 0.5 |
276 |
) |
|
277 |
), |
|
278 | ! |
conditionalPanel( |
279 | ! |
condition = |
280 | ! |
paste0("input['", ns("method"), "'] == 'Z-score'"), |
281 | ! |
sliderInput( |
282 | ! |
ns("zscore_slider"), |
283 | ! |
"Outlier range:", |
284 | ! |
min = 1, |
285 | ! |
max = 5, |
286 | ! |
value = 3, |
287 | ! |
step = 0.5 |
288 |
) |
|
289 |
), |
|
290 | ! |
conditionalPanel( |
291 | ! |
condition = |
292 | ! |
paste0("input['", ns("method"), "'] == 'Percentile'"), |
293 | ! |
sliderInput( |
294 | ! |
ns("percentile_slider"), |
295 | ! |
"Outlier range:", |
296 | ! |
min = 0.001, |
297 | ! |
max = 0.5, |
298 | ! |
value = 0.01, |
299 | ! |
step = 0.001 |
300 |
) |
|
301 |
), |
|
302 | ! |
uiOutput(ns("ui_outlier_help")) |
303 |
) |
|
304 |
), |
|
305 | ! |
teal.widgets::panel_item( |
306 | ! |
title = "Plot settings", |
307 | ! |
selectInput( |
308 | ! |
inputId = ns("ggtheme"), |
309 | ! |
label = "Theme (by ggplot):", |
310 | ! |
choices = ggplot_themes, |
311 | ! |
selected = args$ggtheme, |
312 | ! |
multiple = FALSE |
313 |
) |
|
314 |
) |
|
315 |
), |
|
316 | ! |
forms = tagList( |
317 | ! |
teal.widgets::verbatim_popup_ui(ns("rcode"), "Show R code") |
318 |
), |
|
319 | ! |
pre_output = args$pre_output, |
320 | ! |
post_output = args$post_output |
321 |
) |
|
322 |
} |
|
323 | ||
324 |
# Server function for the outliers module |
|
325 |
srv_outliers <- function(id, data, reporter, filter_panel_api, outlier_var, |
|
326 |
categorical_var, plot_height, plot_width, ggplot2_args) { |
|
327 | ! |
with_reporter <- !missing(reporter) && inherits(reporter, "Reporter") |
328 | ! |
with_filter <- !missing(filter_panel_api) && inherits(filter_panel_api, "FilterPanelAPI") |
329 | ! |
checkmate::assert_class(data, "reactive") |
330 | ! |
checkmate::assert_class(isolate(data()), "teal_data") |
331 | ! |
moduleServer(id, function(input, output, session) { |
332 | ! |
teal.logger::log_shiny_input_changes(input, namespace = "teal.modules.general") |
333 | ||
334 | ! |
ns <- session$ns |
335 | ||
336 | ! |
vars <- list(outlier_var = outlier_var, categorical_var = categorical_var) |
337 | ||
338 | ! |
rule_diff <- function(other) { |
339 | ! |
function(value) { |
340 | ! |
othervalue <- tryCatch(selector_list()[[other]]()[["select"]], error = function(e) NULL) |
341 | ! |
if (!is.null(othervalue) && identical(othervalue, value)) { |
342 | ! |
"`Variable` and `Categorical factor` cannot be the same" |
343 |
} |
|
344 |
} |
|
345 |
} |
|
346 | ||
347 | ! |
selector_list <- teal.transform::data_extract_multiple_srv( |
348 | ! |
data_extract = vars, |
349 | ! |
datasets = data, |
350 | ! |
select_validation_rule = list( |
351 | ! |
outlier_var = shinyvalidate::compose_rules( |
352 | ! |
shinyvalidate::sv_required("Please select a variable"), |
353 | ! |
rule_diff("categorical_var") |
354 |
), |
|
355 | ! |
categorical_var = rule_diff("outlier_var") |
356 |
) |
|
357 |
) |
|
358 | ||
359 | ! |
iv_r <- reactive({ |
360 | ! |
iv <- shinyvalidate::InputValidator$new() |
361 | ! |
iv$add_rule("method", shinyvalidate::sv_required("Please select a method")) |
362 | ! |
iv$add_rule("boxplot_alts", shinyvalidate::sv_required("Please select Plot Type")) |
363 | ! |
teal.transform::compose_and_enable_validators(iv, selector_list) |
364 |
}) |
|
365 | ||
366 | ! |
reactive_select_input <- reactive({ |
367 | ! |
if (is.null(selector_list()$categorical_var) || length(selector_list()$categorical_var()$select) == 0) { |
368 | ! |
selector_list()[names(selector_list()) != "categorical_var"] |
369 |
} else { |
|
370 | ! |
selector_list() |
371 |
} |
|
372 |
}) |
|
373 | ||
374 | ! |
anl_merged_input <- teal.transform::merge_expression_srv( |
375 | ! |
selector_list = reactive_select_input, |
376 | ! |
datasets = data, |
377 | ! |
merge_function = "dplyr::inner_join" |
378 |
) |
|
379 | ||
380 | ! |
anl_merged_q <- reactive({ |
381 | ! |
req(anl_merged_input()) |
382 | ! |
data() %>% |
383 | ! |
teal.code::eval_code(as.expression(anl_merged_input()$expr)) |
384 |
}) |
|
385 | ||
386 | ! |
merged <- list( |
387 | ! |
anl_input_r = anl_merged_input, |
388 | ! |
anl_q_r = anl_merged_q |
389 |
) |
|
390 | ||
391 | ! |
n_outlier_missing <- reactive({ |
392 | ! |
req(iv_r()$is_valid()) |
393 | ! |
outlier_var <- as.vector(merged$anl_input_r()$columns_source$outlier_var) |
394 | ! |
ANL <- merged$anl_q_r()[["ANL"]] |
395 | ! |
sum(is.na(ANL[[outlier_var]])) |
396 |
}) |
|
397 | ||
398 |
# Used to create outlier table and the dropdown with additional columns |
|
399 | ! |
dataname_first <- isolate(teal.data::datanames(data())[[1]]) |
400 | ||
401 | ! |
common_code_q <- reactive({ |
402 | ! |
req(iv_r()$is_valid()) |
403 | ||
404 | ! |
ANL <- merged$anl_q_r()[["ANL"]] |
405 | ! |
qenv <- merged$anl_q_r() |
406 | ||
407 | ! |
outlier_var <- as.vector(merged$anl_input_r()$columns_source$outlier_var) |
408 | ! |
categorical_var <- as.vector(merged$anl_input_r()$columns_source$categorical_var) |
409 | ! |
order_by_outlier <- input$order_by_outlier |
410 | ! |
method <- input$method |
411 | ! |
split_outliers <- input$split_outliers |
412 | ! |
teal::validate_has_data( |
413 |
# missing values in the categorical variable may be used to form a category of its own |
|
414 | ! |
`if`( |
415 | ! |
length(categorical_var) == 0, |
416 | ! |
ANL, |
417 | ! |
ANL[, names(ANL) != categorical_var, drop = FALSE] |
418 |
), |
|
419 | ! |
min_nrow = 10, |
420 | ! |
complete = TRUE, |
421 | ! |
allow_inf = FALSE |
422 |
) |
|
423 | ! |
validate(need(is.numeric(ANL[[outlier_var]]), "`Variable` is not numeric")) |
424 | ! |
validate(need(length(unique(ANL[[outlier_var]])) > 1, "Variable has no variation, i.e. only one unique value")) |
425 | ||
426 |
# show/hide split_outliers |
|
427 | ! |
if (length(categorical_var) == 0) { |
428 | ! |
shinyjs::hide("split_outliers") |
429 | ! |
if (n_outlier_missing() > 0) { |
430 | ! |
qenv <- teal.code::eval_code( |
431 | ! |
qenv, |
432 | ! |
substitute( |
433 | ! |
expr = ANL <- ANL %>% dplyr::filter(!is.na(outlier_var_name)), |
434 | ! |
env = list(outlier_var_name = as.name(outlier_var)) |
435 |
) |
|
436 |
) |
|
437 |
} |
|
438 |
} else { |
|
439 | ! |
validate(need( |
440 | ! |
is.factor(ANL[[categorical_var]]) || |
441 | ! |
is.character(ANL[[categorical_var]]) || |
442 | ! |
is.integer(ANL[[categorical_var]]), |
443 | ! |
"`Categorical factor` must be `factor`, `character`, or `integer`" |
444 |
)) |
|
445 | ||
446 | ! |
if (n_outlier_missing() > 0) { |
447 | ! |
qenv <- teal.code::eval_code( |
448 | ! |
qenv, |
449 | ! |
substitute( |
450 | ! |
expr = ANL <- ANL %>% dplyr::filter(!is.na(outlier_var_name)), |
451 | ! |
env = list(outlier_var_name = as.name(outlier_var)) |
452 |
) |
|
453 |
) |
|
454 |
} |
|
455 | ! |
shinyjs::show("split_outliers") |
456 |
} |
|
457 | ||
458 |
# slider |
|
459 | ! |
outlier_definition_param <- if (method == "IQR") { |
460 | ! |
input$iqr_slider |
461 | ! |
} else if (method == "Z-score") { |
462 | ! |
input$zscore_slider |
463 | ! |
} else if (method == "Percentile") { |
464 | ! |
input$percentile_slider |
465 |
} |
|
466 | ||
467 |
# this is utils function that converts a %>% NULL %>% b into a %>% b |
|
468 | ! |
remove_pipe_null <- function(x) { |
469 | ! |
if (length(x) == 1) { |
470 | ! |
return(x) |
471 |
} |
|
472 | ! |
if (identical(x[[1]], as.name("%>%")) && is.null(x[[3]])) { |
473 | ! |
return(remove_pipe_null(x[[2]])) |
474 |
} |
|
475 | ! |
return(as.call(c(x[[1]], lapply(x[-1], remove_pipe_null)))) |
476 |
} |
|
477 | ||
478 | ! |
qenv <- teal.code::eval_code( |
479 | ! |
qenv, |
480 | ! |
substitute( |
481 | ! |
expr = { |
482 | ! |
ANL_OUTLIER <- ANL %>% |
483 | ! |
group_expr %>% # styler: off |
484 | ! |
dplyr::mutate(is_outlier = { |
485 | ! |
q1_q3 <- stats::quantile(outlier_var_name, probs = c(0.25, 0.75)) |
486 | ! |
iqr <- q1_q3[2] - q1_q3[1] |
487 | ! |
!(outlier_var_name >= q1_q3[1] - 1.5 * iqr & outlier_var_name <= q1_q3[2] + 1.5 * iqr) |
488 |
}) %>% |
|
489 | ! |
calculate_outliers %>% # styler: off |
490 | ! |
ungroup_expr %>% # styler: off |
491 | ! |
dplyr::filter(is_outlier | is_outlier_selected) %>% |
492 | ! |
dplyr::select(-is_outlier) |
493 |
}, |
|
494 | ! |
env = list( |
495 | ! |
calculate_outliers = if (method == "IQR") { |
496 | ! |
substitute( |
497 | ! |
expr = dplyr::mutate(is_outlier_selected = { |
498 | ! |
q1_q3 <- stats::quantile(outlier_var_name, probs = c(0.25, 0.75)) |
499 | ! |
iqr <- q1_q3[2] - q1_q3[1] |
500 |
!( |
|
501 | ! |
outlier_var_name >= q1_q3[1] - outlier_definition_param * iqr & |
502 | ! |
outlier_var_name <= q1_q3[2] + outlier_definition_param * iqr |
503 |
) |
|
504 |
}), |
|
505 | ! |
env = list( |
506 | ! |
outlier_var_name = as.name(outlier_var), |
507 | ! |
outlier_definition_param = outlier_definition_param |
508 |
) |
|
509 |
) |
|
510 | ! |
} else if (method == "Z-score") { |
511 | ! |
substitute( |
512 | ! |
expr = dplyr::mutate( |
513 | ! |
is_outlier_selected = abs(outlier_var_name - mean(outlier_var_name)) / |
514 | ! |
stats::sd(outlier_var_name) > outlier_definition_param |
515 |
), |
|
516 | ! |
env = list( |
517 | ! |
outlier_var_name = as.name(outlier_var), |
518 | ! |
outlier_definition_param = outlier_definition_param |
519 |
) |
|
520 |
) |
|
521 | ! |
} else if (method == "Percentile") { |
522 | ! |
substitute( |
523 | ! |
expr = dplyr::mutate( |
524 | ! |
is_outlier_selected = outlier_var_name < stats::quantile(outlier_var_name, outlier_definition_param) | |
525 | ! |
outlier_var_name > stats::quantile(outlier_var_name, 1 - outlier_definition_param) |
526 |
), |
|
527 | ! |
env = list( |
528 | ! |
outlier_var_name = as.name(outlier_var), |
529 | ! |
outlier_definition_param = outlier_definition_param |
530 |
) |
|
531 |
) |
|
532 |
}, |
|
533 | ! |
outlier_var_name = as.name(outlier_var), |
534 | ! |
group_expr = if (isTRUE(split_outliers) && length(categorical_var) != 0) { |
535 | ! |
substitute(dplyr::group_by(x), list(x = as.name(categorical_var))) |
536 |
}, |
|
537 | ! |
ungroup_expr = if (isTRUE(split_outliers) && length(categorical_var) != 0) { |
538 | ! |
substitute(dplyr::ungroup()) |
539 |
} |
|
540 |
) |
|
541 |
) %>% |
|
542 | ! |
remove_pipe_null() |
543 |
) |
|
544 | ||
545 |
# ANL_OUTLIER_EXTENDED is the base table |
|
546 | ! |
qenv <- teal.code::eval_code( |
547 | ! |
qenv, |
548 | ! |
substitute( |
549 | ! |
expr = { |
550 | ! |
ANL_OUTLIER_EXTENDED <- dplyr::left_join( |
551 | ! |
ANL_OUTLIER, |
552 | ! |
dplyr::select( |
553 | ! |
dataname, |
554 | ! |
dplyr::setdiff(names(dataname), dplyr::setdiff(names(ANL_OUTLIER), join_keys)) |
555 |
), |
|
556 | ! |
by = join_keys |
557 |
) |
|
558 |
}, |
|
559 | ! |
env = list( |
560 | ! |
dataname = as.name(dataname_first), |
561 | ! |
join_keys = as.character(teal.data::join_keys(data())[dataname_first, dataname_first]) |
562 |
) |
|
563 |
) |
|
564 |
) |
|
565 | ||
566 | ! |
if (length(categorical_var) > 0) { |
567 | ! |
qenv <- teal.code::eval_code( |
568 | ! |
qenv, |
569 | ! |
substitute( |
570 | ! |
expr = summary_table_pre <- ANL_OUTLIER %>% |
571 | ! |
dplyr::filter(is_outlier_selected) %>% |
572 | ! |
dplyr::select(outlier_var_name, categorical_var_name) %>% |
573 | ! |
dplyr::group_by(categorical_var_name) %>% |
574 | ! |
dplyr::summarise(n_outliers = dplyr::n()) %>% |
575 | ! |
dplyr::right_join( |
576 | ! |
ANL %>% |
577 | ! |
dplyr::select(outlier_var_name, categorical_var_name) %>% |
578 | ! |
dplyr::group_by(categorical_var_name) %>% |
579 | ! |
dplyr::summarise( |
580 | ! |
total_in_cat = dplyr::n(), |
581 | ! |
n_na = sum(is.na(outlier_var_name) | is.na(categorical_var_name)) |
582 |
), |
|
583 | ! |
by = categorical_var |
584 |
) %>% |
|
585 |
# This is important as there may be categorical variables with natural orderings, e.g. AGE. |
|
586 |
# The plots should be displayed by default in increasing order in these situations. |
|
587 |
# dplyr::arrange will sort integer, factor, and character data types in the expected way. |
|
588 | ! |
dplyr::arrange(categorical_var_name) %>% |
589 | ! |
dplyr::mutate( |
590 | ! |
n_outliers = dplyr::if_else(is.na(n_outliers), 0, as.numeric(n_outliers)), |
591 | ! |
display_str = dplyr::if_else( |
592 | ! |
n_outliers > 0, |
593 | ! |
sprintf("%d [%.02f%%]", n_outliers, 100 * n_outliers / total_in_cat), |
594 | ! |
"0" |
595 |
), |
|
596 | ! |
display_str_na = dplyr::if_else( |
597 | ! |
n_na > 0, |
598 | ! |
sprintf("%d [%.02f%%]", n_na, 100 * n_na / total_in_cat), |
599 | ! |
"0" |
600 |
), |
|
601 | ! |
order = seq_along(n_outliers) |
602 |
), |
|
603 | ! |
env = list( |
604 | ! |
categorical_var = categorical_var, |
605 | ! |
categorical_var_name = as.name(categorical_var), |
606 | ! |
outlier_var_name = as.name(outlier_var) |
607 |
) |
|
608 |
) |
|
609 |
) |
|
610 |
# now to handle when user chooses to order based on amount of outliers |
|
611 | ! |
if (order_by_outlier) { |
612 | ! |
qenv <- teal.code::eval_code( |
613 | ! |
qenv, |
614 | ! |
quote( |
615 | ! |
summary_table_pre <- summary_table_pre %>% |
616 | ! |
dplyr::arrange(desc(n_outliers / total_in_cat)) %>% |
617 | ! |
dplyr::mutate(order = seq_len(nrow(summary_table_pre))) |
618 |
) |
|
619 |
) |
|
620 |
} |
|
621 | ||
622 | ! |
qenv <- teal.code::eval_code( |
623 | ! |
qenv, |
624 | ! |
substitute( |
625 | ! |
expr = { |
626 |
# In order for geom_rug to work properly when reordering takes place inside facet_grid, |
|
627 |
# all tables must have the column used for reording. |
|
628 |
# In this case, the column used for reordering is `order`. |
|
629 | ! |
ANL_OUTLIER <- dplyr::left_join( |
630 | ! |
ANL_OUTLIER, |
631 | ! |
summary_table_pre[, c("order", categorical_var)], |
632 | ! |
by = categorical_var |
633 |
) |
|
634 |
# so that x axis of plot aligns with columns of summary table, from most outliers to least by percentage |
|
635 | ! |
ANL <- ANL %>% |
636 | ! |
dplyr::left_join( |
637 | ! |
dplyr::select(summary_table_pre, categorical_var_name, order), |
638 | ! |
by = categorical_var |
639 |
) %>% |
|
640 | ! |
dplyr::arrange(order) |
641 | ! |
summary_table <- summary_table_pre %>% |
642 | ! |
dplyr::select( |
643 | ! |
categorical_var_name, |
644 | ! |
Outliers = display_str, Missings = display_str_na, Total = total_in_cat |
645 |
) %>% |
|
646 | ! |
dplyr::mutate_all(as.character) %>% |
647 | ! |
tidyr::pivot_longer(-categorical_var_name) %>% |
648 | ! |
tidyr::pivot_wider(names_from = categorical_var, values_from = value) %>% |
649 | ! |
tibble::column_to_rownames("name") |
650 | ! |
summary_table |
651 |
}, |
|
652 | ! |
env = list( |
653 | ! |
categorical_var = categorical_var, |
654 | ! |
categorical_var_name = as.name(categorical_var) |
655 |
) |
|
656 |
) |
|
657 |
) |
|
658 |
} |
|
659 | ||
660 | ! |
if (length(categorical_var) > 0 && nrow(qenv[["ANL_OUTLIER"]]) > 0) { |
661 | ! |
shinyjs::show("order_by_outlier") |
662 |
} else { |
|
663 | ! |
shinyjs::hide("order_by_outlier") |
664 |
} |
|
665 | ||
666 | ! |
qenv |
667 |
}) |
|
668 | ||
669 | ! |
output$summary_table <- DT::renderDataTable( |
670 | ! |
expr = { |
671 | ! |
if (iv_r()$is_valid()) { |
672 | ! |
categorical_var <- as.vector(merged$anl_input_r()$columns_source$categorical_var) |
673 | ! |
if (!is.null(categorical_var)) { |
674 | ! |
DT::datatable( |
675 | ! |
common_code_q()[["summary_table"]], |
676 | ! |
options = list( |
677 | ! |
dom = "t", |
678 | ! |
autoWidth = TRUE, |
679 | ! |
columnDefs = list(list(width = "200px", targets = "_all")) |
680 |
) |
|
681 |
) |
|
682 |
} |
|
683 |
} |
|
684 |
} |
|
685 |
) |
|
686 | ||
687 |
# boxplot/violinplot # nolint commented_code |
|
688 | ! |
boxplot_q <- reactive({ |
689 | ! |
req(common_code_q()) |
690 | ! |
ANL <- common_code_q()[["ANL"]] |
691 | ! |
ANL_OUTLIER <- common_code_q()[["ANL_OUTLIER"]] |
692 | ||
693 | ! |
outlier_var <- as.vector(merged$anl_input_r()$columns_source$outlier_var) |
694 | ! |
categorical_var <- as.vector(merged$anl_input_r()$columns_source$categorical_var) |
695 | ||
696 |
# validation |
|
697 | ! |
teal::validate_has_data(ANL, 1) |
698 | ||
699 |
# boxplot |
|
700 | ! |
plot_call <- quote(ANL %>% ggplot()) |
701 | ||
702 | ! |
plot_call <- if (input$boxplot_alts == "Box plot") { |
703 | ! |
substitute(expr = plot_call + geom_boxplot(outlier.shape = NA), env = list(plot_call = plot_call)) |
704 | ! |
} else if (input$boxplot_alts == "Violin plot") { |
705 | ! |
substitute(expr = plot_call + geom_violin(), env = list(plot_call = plot_call)) |
706 |
} else { |
|
707 | ! |
NULL |
708 |
} |
|
709 | ||
710 | ! |
plot_call <- if (identical(categorical_var, character(0)) || is.null(categorical_var)) { |
711 | ! |
inner_call <- substitute( |
712 | ! |
expr = plot_call + |
713 | ! |
aes(x = "Entire dataset", y = outlier_var_name) + |
714 | ! |
scale_x_discrete(), |
715 | ! |
env = list(plot_call = plot_call, outlier_var_name = as.name(outlier_var)) |
716 |
) |
|
717 | ! |
if (nrow(ANL_OUTLIER) > 0) { |
718 | ! |
substitute( |
719 | ! |
expr = inner_call + geom_point( |
720 | ! |
data = ANL_OUTLIER, |
721 | ! |
aes(x = "Entire dataset", y = outlier_var_name, color = is_outlier_selected) |
722 |
), |
|
723 | ! |
env = list(inner_call = inner_call, outlier_var_name = as.name(outlier_var)) |
724 |
) |
|
725 |
} else { |
|
726 | ! |
inner_call |
727 |
} |
|
728 |
} else { |
|
729 | ! |
substitute( |
730 | ! |
expr = plot_call + |
731 | ! |
aes(y = outlier_var_name, x = reorder(categorical_var_name, order)) + |
732 | ! |
xlab(categorical_var) + |
733 | ! |
scale_x_discrete() + |
734 | ! |
geom_point( |
735 | ! |
data = ANL_OUTLIER, |
736 | ! |
aes(x = as.factor(categorical_var_name), y = outlier_var_name, color = is_outlier_selected) |
737 |