Skip to contents

Additional assertion functions which can be used together with the checkmate package.

Usage

assert_list_of_variables(x, .var.name = checkmate::vname(x), add = NULL)

assert_df_with_variables(
  df,
  variables,
  na_level = NULL,
  .var.name = checkmate::vname(df),
  add = NULL
)

assert_valid_factor(
  x,
  min.levels = 1,
  max.levels = NULL,
  null.ok = TRUE,
  any.missing = TRUE,
  n.levels = NULL,
  len = NULL,
  .var.name = checkmate::vname(x),
  add = NULL
)

assert_df_with_factors(
  df,
  variables,
  min.levels = 1,
  max.levels = NULL,
  any.missing = TRUE,
  na_level = NULL,
  .var.name = checkmate::vname(df),
  add = NULL
)

assert_proportion_value(x, include_boundaries = FALSE)

Arguments

x

(any)
object to test.

.var.name

[character(1)]
Name of the checked object to print in assertions. Defaults to the heuristic implemented in vname.

add

[AssertCollection]
Collection to store assertion messages. See AssertCollection.

df

(data frame)
data set to test.

variables

(named list of character)
list of variables to test.

na_level

(character)
the string you have been using to represent NA or missing data. For NA values please consider using directly base::is.na or similar approaches.

min.levels

[integer(1)]
Minimum number of factor levels. Default is NULL (no check).

max.levels

[integer(1)]
Maximum number of factor levels. Default is NULL (no check).

null.ok

[logical(1)]
If set to TRUE, x may also be NULL. In this case only a type check of x is performed, all additional checks are disabled.

any.missing

[logical(1)]
Are vectors with missing values allowed? Default is TRUE.

n.levels

[integer(1)]
Exact number of factor levels. Default is NULL (no check).

len

[integer(1)]
Exact expected length of x.

include_boundaries

(logical)
whether to include boundaries when testing for proportions.

(`integer`)

minimum number of factor levels. Default is 1.

...

a collection of objects to test.

Functions

  • assert_list_of_variables(): Check whether x is a valid list of variable names. NULL elements of the list x are dropped out with Filter(Negate(is.null), x).

  • assert_df_with_variables(): Check whether df is a data frame with the analysis variables. Please notice how this produces an error when not all variables are present in the data.frame while the opposite is not required.

  • assert_valid_factor(): Check whether x is a valid factor (has levels and no empty string levels). Note that NULL and NA elements are allowed.

  • assert_df_with_factors(): Check whether df is a data frame where the analysis variables are all factors. Note that the creation of NA by direct call of factor() will trim NA levels out of the vector list itself.

  • assert_proportion_value(): Check whether x is a proportion: number between 0 and 1.

Examples

# Check whether `x` is a valid list of variable names.

# Internal function - assert_list_of_variables
if (FALSE) {
assert_list_of_variables(list(val = "a"))
assert_list_of_variables(list(val = c("a", "b")))
assert_list_of_variables(list(val = c("a", "b"), val2 = NULL))

# The following calls fail
assert_list_of_variables(list(1, 2))
assert_list_of_variables(list("bla" = 2))
}

# Check whether `df` contains the analysis `variables`.

# Internal function - assert_df_with_variables
if (FALSE) {
assert_df_with_variables(
  df = data.frame(a = 5, b = 3),
  variables = list(val = "a")
)
assert_df_with_variables(
  df = data.frame(a = 5, b = 3),
  variables = list(val = c("a", "b"))
)
assert_df_with_variables(
  df = data.frame(a = 5, b = 3),
  variables = list(val = c("a", "b"))
)
assert_df_with_variables(
  df = data.frame(a = 5, b = 3, e = "<Missing>"),
  variables = list(val = c("a", "b")), na_level = "<Missing>"
)

# The following calls fail
assert_df_with_variables(
  df = matrix(1:5, ncol = 2, nrow = 3),
  variables = list(val = "a")
)
assert_df_with_variables(
  df = data.frame(a = 5, b = 3),
  variables = list(val = c("a", "b", "c"))
)
assert_df_with_variables(
  df = data.frame(a = 5, b = 3, e = "<Missing>"),
  variables = list(val = c("a", "b", "e")), na_level = "<Missing>"
)
}

# Check whether `x` is a valid factor.

# Internal function - assert_valid_factor
if (FALSE) {
assert_valid_factor(factor(c("a", NULL)))
assert_valid_factor(factor(c("a", "b")))
assert_valid_factor(factor(c("a", "b")), len = 2)
assert_valid_factor(factor(c("a", NA)), any.missing = TRUE)
assert_valid_factor(factor("A", levels = c("A", "B")))

# The following calls fail
assert_valid_factor(-1)
assert_valid_factor(factor(c("a", "")))
assert_valid_factor(factor(c("a", NA)), any.missing = FALSE)
assert_valid_factor(factor(NULL))
assert_valid_factor(factor(c(NULL, "")))
assert_valid_factor(factor())
}

# Check whether `df` contains all factor analysis `variables`.
adf <- data.frame(a = factor(c("A", "B")), b = 3)
bdf <- data.frame(a = factor(letters[1:3]), b = factor(c(1, 2, 3)), d = 3)

# Internal function - assert_df_with_factors
if (FALSE) {
assert_df_with_factors(df = adf, variables = list(val = "a"))
assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1)
assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 2, max.levels = 2)
assert_df_with_factors(
  df = data.frame(a = factor(c("A", NA, "B")), b = 3),
  variable = list(val = "a"),
  min.levels = 2,
  max.levels = 2
)

# The following calls fail
assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1, max.levels = 1)
assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1, max.levels = 1)
assert_df_with_factors(df = adf, variables = list(val = "a", val = "b", val = ""))
assert_df_with_factors(df = adf, variables = list(val = "a", val = "b", val = "d"))
assert_df_with_factors(
  df = bdf,
  variables = list(val = "a", val = "b"),
  min.levels = 1,
  max.levels = 1
)
}

# Check whether `x` is between 0 and 1.
# Internal function - assert_proportion_value
if (FALSE) {
assert_proportion_value(x = 0, include_boundaries = TRUE)
assert_proportion_value(x = 0.3)

# These fail
assert_proportion_value(x = 1.3)
assert_proportion_value(x = 1)
}