Rebuilding Random CDISC Cached Data
NEST team
8/17/2020
rebuild_cached_data.Rmd
Getting Started
The following script is used to create, compare and save cached data
to data/
directory.
Starting in R 3.6.0
the default kind of under-the-hood
random-number generator was changed. Now, in order to get the results
from set.seed()
to match, you have to first call the
function RNGkind(sample.kind = "Rounding")
.
It does throw an expected warning
Warning message:
In RNGkind(sample.kind = "Rounding") : non-uniform 'Rounding' sampler used
Code maintenance
Currently, when a new random.cdisc.data function is created then this script needs to be manually updated to include the new cached data set in the rebuild process. If an existing function is modified then the cached data also need to be updated by running through the rebuild process.
Code to run interactively
library(dplyr)
library(random.cdisc.data)
library(diffdf)
# call function to be able to match random number generation from previous R versions
RNGkind(sample.kind = "Rounding")
# create the new cached data sets
cadsl_new <- radsl(seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadae_new <- radae(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadaette_new <- radaette(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadcm_new <- radcm(cadsl_new, seed = 1, who_coding = TRUE, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadex_new <- radex(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadlb_new <- radlb(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadmh_new <- radmh(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadpc_new <- radpc(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadpp_new <- radpp(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadab_new <- radab(cadsl_new, cadpc_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadqs_new <- radqs(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadrs_new <- radrs(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadtte_new <- radtte(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadvs_new <- radvs(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadeg_new <- radeg(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadtr_new <- radtr(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
caddv_new <- raddv(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadsub_new <- radsub(cadsl_new, seed = 1, na_vars = list()) %>% `attr<-`("creation date", Sys.Date())
cadhy_new <- radhy(cadsl_new, seed = 1) %>% `attr<-`("creation date", Sys.Date())
cadqlqc_new <- radqlqc(cadsl_new, percent = 80, number = 2, seed = 1) %>% `attr<-`("creation date", Sys.Date())
# use diffdf package: diffdf() to compare cached vs new and review/verify differences
diffdf(cadsl, cadsl_new)
diffdf(cadab, cadab_new)
diffdf(cadae, cadae_new)
diffdf(cadaette, cadaette_new)
diffdf(cadcm, cadcm_new)
diffdf(cadex, cadex_new)
diffdf(cadlb, cadlb_new)
diffdf(cadmh, cadmh_new)
diffdf(cadpc, cadpc_new)
diffdf(cadpp, cadpp_new)
diffdf(cadqs, cadqs_new)
diffdf(cadrs, cadrs_new)
diffdf(cadtte, cadtte_new)
diffdf(cadvs, cadvs_new)
diffdf(cadeg, cadeg_new)
diffdf(cadtr, cadtr_new)
diffdf(caddv, caddv_new)
diffdf(cadsub, cadsub_new)
diffdf(cadhy, cadhy_new)
diffdf(cadqlqc, cadqlqc_new)
# when diff reflects changes accurately then create environment objects to save out to cache
cadsl <- cadsl_new
cadab <- cadab_new
cadae <- cadae_new
cadaette <- cadaette_new
cadcm <- cadcm_new
cadeg <- cadeg_new
cadex <- cadex_new
cadlb <- cadlb_new
cadmh <- cadmh_new
cadpc <- cadpc_new
cadpp <- cadpp_new
cadqs <- cadqs_new
cadrs <- cadrs_new
cadtr <- cadtr_new
cadtte <- cadtte_new
cadvs <- cadvs_new
caddv <- caddv_new
cadsub <- cadsub_new
cadhy <- cadhy_new
cadqlqc <- cadqlqc_new
# update cache
save(cadsl, file = "data/cadsl.RData", compress = "xz")
save(cadab, file = "data/cadab.RData", compress = "xz")
save(cadae, file = "data/cadae.RData", compress = "xz")
save(cadaette, file = "data/cadaette.RData", compress = "xz")
save(cadcm, file = "data/cadcm.RData", compress = "xz")
save(cadeg, file = "data/cadeg.RData", compress = "xz")
save(cadex, file = "data/cadex.RData", compress = "xz")
save(cadlb, file = "data/cadlb.RData", compress = "xz")
save(cadmh, file = "data/cadmh.RData", compress = "xz")
save(cadpc, file = "data/cadpc.RData", compress = "xz")
save(cadpp, file = "data/cadpp.RData", compress = "xz")
save(cadqs, file = "data/cadqs.RData", compress = "xz")
save(cadrs, file = "data/cadrs.RData", compress = "xz")
save(cadtr, file = "data/cadtr.RData", compress = "xz")
save(cadtte, file = "data/cadtte.RData", compress = "xz")
save(cadvs, file = "data/cadvs.RData", compress = "xz")
save(caddv, file = "data/caddv.RData", compress = "xz")
save(cadsub, file = "data/cadsub.RData", compress = "xz")
save(cadhy, file = "data/cadhy.RData", compress = "xz")
save(cadqlqc, file = "data/cadqlqc.RData", compress = "xz")