dunlin
provides a variety of data tools to reformat and manipulate a subset of the tables in a data set.
Installation
It is recommended that you create and use a GitHub PAT to install the latest version of this package. Once you have the PAT, run the following:
Sys.setenv(GITHUB_PAT = "your_access_token_here")
if (!require("remotes")) install.packages("remotes")
remotes::install_github("insightsengineering/dunlin@*release")
Usage
library(dunlin)
df1 <- data.frame(
"id" = c("a", "b", NA, "a", "k", "x"),
"id2" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
"val" = letters[1:6]
)
df2 <- data.frame(
"id" = c("a", "b", NA, "a", "k", "x"),
"id2" = factor(c("f1", "f2", NA, NA, "f1", "f1")),
"num" = 1:6
)
db <- list(df1 = df1, df2 = df2)
prop_db <- propagate(db, "df1", "val", c("id", "id2"))
which returns prop_db
as
$df1
id id2 val
1 a f1 a
2 b f2 b
3 <NA> <NA> c
4 a <NA> d
5 k f1 e
6 x f1 f
$df2
id id2 num val
1 a f1 1 a
2 b f2 2 b
3 <NA> <NA> 3 c
4 a <NA> 4 d
5 k f1 5 e
6 x f1 6 f
new_format <- list(
df1 = list(
id = rule("No ID available" = c("", NA, "<Missing>")),
id2 = rule("<Missing>" = c("", NA, "<Missing>"))
)
)
res <- dunlin::reformat(prop_db, new_format, .na_last = TRUE)
which result in res
as
$df1
id id2 val
1 a f1 a
2 b f2 b
3 No ID available <Missing> c
4 a <Missing> d
5 k f1 e
6 x f1 f
$df2
id id2 num val
1 a f1 1 a
2 b f2 2 b
3 <NA> <NA> 3 c
4 a <NA> 4 d
5 k f1 5 e
6 x f1 6 f