library(httr)
library(stringr)
library(dplyr)
library(glue)
library(tibble)
library(purrr)
library(tidyr)
library(gt)
library(arrow)
library(tictoc)
Validated R packages
Context
The code and license is at github.com/insightsengineering/rvalidationhub-packages.
This document summarises validated R packages by R version as it was at 2024-07-12 09:59:30.52515.
It is important to note R package functionality is validated via a risk based approach, and package functionality is tested and documented against specific environments as a rolling cohort. R packages are not intended to be used outside the ‘snapshot’ they were validated within. In this repo you cannot see the environments we use, or how the package cohorts are defined temporally so cannot assume the packages will be stable on your environment and with a different cohort of package versions.
A small number of validated packages are not open sourced, usually as they are specific to our infrastructure (e.g. wrappers to simplify working against our Redshift, S3 or Snowflake instances).
We do not currently have a concept of ‘validated’ Shiny applications (we do have a business process, in the same way study code is QC’d, not ‘validated’ via a CSV process).
Output
A table is shown in the footer of this document, and a parquet file is saved in the root of this repo validated_packages.parquet
.
Code
# variables
<- "https://packages.roche.com/"
host
# get all repos
<- httr::content(httr::GET(
response paste0(host, "__api__/repos")
))
# flatten response into a tibble
<- tibble(
tibble_repos id = unlist(map(response, "id")),
name = unlist(map(response, "name"))
|>
) # keeping only validated
::filter(
dplyrstr_detect(`name`,pattern = "^Validated")
|>
) # new column for R version
::mutate(
dplyrr_version = str_extract(`name`, pattern = "\\d+\\.\\d+")
|>
) # remove 3.10
filter(r_version != "3.10")
# get all packages for a repo
<- tibble()
validated_packages for (i in 1:nrow(tibble_repos)) {
# take single repo
<- slice(tibble_repos,i)
i_row # raw list of current packages
<- httr::content(httr::GET(
response glue("{host}__api__/repos/{i_row$id}/packages?_limit=1000")
))# package version live right now
<- tibble(
tibble_packages r_version = i_row$r_version,
name = unlist(map(response, "name")),
version = unlist(map(response, "version"))
) # Check no pagination
if (length(response) > 999) stop("Pagination not implemented: 1000+ packages in a repo.")
# get archives
for (j in 1:nrow(tibble_packages)) {
<- slice(tibble_packages,j)
j_row
# get package details
<- httr::content(httr::GET(
response glue("{host}__api__/repos/{i_row$id}/packages/{j_row$name}")
))
<- j_row |>
validated_packages bind_rows(validated_packages)
# archived
if (!is.null(response$archived)){
<- tibble(
archived_packages r_version = i_row$r_version,
name = unlist(map(response$archived, "name")),
version = unlist(map(response$archived, "version"))
)
<- validated_packages |>
validated_packages bind_rows(archived_packages)
}
}
}
# Ordering
<- validated_packages |>
validated_packages arrange(
r_version,
name,
version
)
# Save copy
write_parquet(validated_packages, "validated_packages.parquet")
# wide table
|>
validated_packages ::pivot_wider(
tidyrnames_from = r_version,
values_from = version,
values_fn = list
|>
) arrange(name) |>
gt() |>
tab_header(
title = "Validated R packages",
subtitle = glue("Generated on {Sys.Date()}, grouped by R version validated against")
|>
) opt_interactive(
use_search = TRUE,
use_filters = TRUE,
use_resizers = TRUE,
use_highlight = TRUE,
use_compact_mode = FALSE,
use_text_wrapping = TRUE,
use_page_size_select = TRUE
)
Execution took 144.404 seconds