Skip to content

Commit

Permalink
Bug fixes
Browse files Browse the repository at this point in the history
See NEWS.md for details.
  • Loading branch information
mingdeyu committed Jan 21, 2025
1 parent b119249 commit 16501d5
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 142 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# dgpsi 2.5.0-9000 (development version)
- Prediction speed with `predict()` enhanced for small testing data sets by reducing overhead caused by the multi-threading implementation.
- The Python environment now installs packages exclusively from conda-forge whenever possible. Packages from other channels will only be used if they are unavailable on conda-forge.
- A bug in `vigf()`, affecting a bundle of emulators that includes GP emulators, has now been fixed.
- The column names from the training input and output provided to `gp()` and `dgp()` are retained in the relevant slots of the returned objects, as well as in any updated objects produced by the downstream functions that operate on them.
- The column names from the testing input and output supplied to `validate()` and `design()` are retained in the relevant slots of the returned objects.

# dgpsi 2.5.0
- Training times for DGP emulators are now approximately 30%-40% faster.
Expand Down
30 changes: 16 additions & 14 deletions R/design.R
Original file line number Diff line number Diff line change
Expand Up @@ -1358,7 +1358,7 @@ design.dgp <- function(object, N, x_cand = NULL, y_cand = NULL, n_sample = 200,
n_emulators <- length(object) - 1
if ( "id" %in% names(object) ) n_emulators <- n_emulators - 1
for ( k in 1:n_emulators ){
object[[paste('emulator',k,sep='')]] <- validate(object[[paste('emulator',k,sep='')]], x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, force = TRUE)
object[[paste('emulator',k,sep='')]] <- validate(object[[paste('emulator',k,sep='')]], x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, force = TRUE)
rmse[k] <- object[[paste('emulator',k,sep='')]]$oos$rmse
}
if ( verb ) message(" done")
Expand Down Expand Up @@ -1658,7 +1658,7 @@ design.dgp <- function(object, N, x_cand = NULL, y_cand = NULL, n_sample = 200,
n_emulators <- length(object) - 1
if ( "id" %in% names(object) ) n_emulators <- n_emulators - 1
for ( k in 1:n_emulators ){
object[[paste('emulator',k,sep='')]] <- validate(object[[paste('emulator',k,sep='')]], x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, force = TRUE)
object[[paste('emulator',k,sep='')]] <- validate(object[[paste('emulator',k,sep='')]], x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, force = TRUE)
rmse[k] <- object[[paste('emulator',k,sep='')]]$oos$rmse
}
if ( verb ) message(" done")
Expand Down Expand Up @@ -1965,12 +1965,12 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_sample = 20
} else {
type <- 'oos'
if ( inherits(obj_k,"gp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val)
object[[paste('emulator',k,sep='')]] <- obj_k
rmse <- c(rmse, obj_k$oos$rmse)
}
if ( inherits(obj_k,"dgp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, cores = cores, ...)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, cores = cores, ...)
object[[paste('emulator',k,sep='')]] <- obj_k
if (is.categorical[k]) {
rmse <- c(rmse, obj_k$oos$log_loss)
Expand Down Expand Up @@ -2476,12 +2476,12 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_sample = 20
if ( N_acq_ind[nrow(N_acq_ind),k]!=0 ) {
obj_k <- object[[paste('emulator',k,sep='')]]
if ( inherits(obj_k,"gp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, force = TRUE)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, force = TRUE)
object[[paste('emulator',k,sep='')]] <- obj_k
rmse[k] <- obj_k$oos$rmse
}
if ( inherits(obj_k,"dgp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, force = TRUE, cores = cores, ...)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, force = TRUE, cores = cores, ...)
object[[paste('emulator',k,sep='')]] <- obj_k
if (is.categorical[k]){
rmse[k] <- obj_k$oos$log_loss
Expand Down Expand Up @@ -2614,12 +2614,12 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_sample = 20
} else {
type <- 'oos'
if ( inherits(obj_k,"gp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val)
object[[paste('emulator',k,sep='')]] <- obj_k
rmse <- c(rmse, obj_k$oos$rmse)
}
if ( inherits(obj_k,"dgp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, cores = cores, ...)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, cores = cores, ...)
object[[paste('emulator',k,sep='')]] <- obj_k
if (is.categorical[k]){
rmse <- c(rmse, obj_k$oos$log_loss)
Expand Down Expand Up @@ -2923,12 +2923,12 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_sample = 20
if ( is.null(target) ) {
obj_k <- object[[paste('emulator',k,sep='')]]
if ( inherits(obj_k,"gp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, force = TRUE)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, force = TRUE)
object[[paste('emulator',k,sep='')]] <- obj_k
rmse[k] <- obj_k$oos$rmse
}
if ( inherits(obj_k,"dgp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, force = TRUE, cores = cores, ...)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, force = TRUE, cores = cores, ...)
object[[paste('emulator',k,sep='')]] <- obj_k
if (is.categorical[k]){
rmse[k] <- obj_k$oos$log_loss
Expand All @@ -2940,12 +2940,12 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_sample = 20
if ( !istarget[k] ){
obj_k <- object[[paste('emulator',k,sep='')]]
if ( inherits(obj_k,"gp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, force = TRUE)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, force = TRUE)
object[[paste('emulator',k,sep='')]] <- obj_k
rmse[k] <- obj_k$oos$rmse
}
if ( inherits(obj_k,"dgp") ) {
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k], verb = FALSE, M = M_val, force = TRUE, cores = cores, ...)
obj_k <- validate(obj_k, x_test = x_test, y_test = y_test[,k,drop=F], verb = FALSE, M = M_val, force = TRUE, cores = cores, ...)
object[[paste('emulator',k,sep='')]] <- obj_k
if (is.categorical[k]){
rmse[k] <- obj_k$oos$log_loss
Expand Down Expand Up @@ -3154,8 +3154,8 @@ extract_all <- function(X ,x_cand){

#check argument x_test and y_test
check_xy_test <- function(x_test, y_test, n_dim_X, n_dim_Y){
x_test <- unname(x_test)
y_test <- unname(y_test)
#x_test <- unname(x_test)
#y_test <- unname(y_test)
if ( !is.matrix(x_test)&!is.vector(x_test) ) stop("'x_test' must be a vector or a matrix.", call. = FALSE)
if ( !is.matrix(y_test)&!is.vector(y_test) ) stop("'y_test' must be a vector or a matrix.", call. = FALSE)
if ( is.vector(x_test) ) {
Expand All @@ -3173,6 +3173,8 @@ check_xy_test <- function(x_test, y_test, n_dim_X, n_dim_Y){
}
}
if ( nrow(x_test)!=nrow(y_test) ) stop("'x_test' and 'y_test' have different number of data points.", call. = FALSE)
rownames(x_test) <- NULL
rownames(y_test) <- NULL
return(list(x_test, y_test))
}

Expand Down
6 changes: 4 additions & 2 deletions R/dgp.R
Original file line number Diff line number Diff line change
Expand Up @@ -626,8 +626,10 @@ dgp <- function(X, Y, depth = 2, node = ncol(X), name = 'sexp', lengthscale = 1.

res <- list()
res[['id']] <- if (is.null(id)) uuid::UUIDgenerate() else id
res[['data']][['X']] <- unname(X)
res[['data']][['Y']] <- unname(Y)
rownames(X) <- NULL
rownames(Y) <- NULL
res[['data']][['X']] <- X
res[['data']][['Y']] <- Y
res[['specs']] <- extract_specs(est_obj, "dgp")
res[['specs']][['internal_dims']] <- if( is.null(internal_input_idx) ) 1:n_dim_X else as.integer(reticulate::py_to_r(internal_input_idx)+1)
res[['specs']][['external_dims']] <- if( is.null(internal_input_idx) ) FALSE else as.integer(reticulate::py_to_r(external_input_idx)+1)
Expand Down
6 changes: 4 additions & 2 deletions R/gp.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,10 @@ gp <- function(X, Y, name = 'sexp', lengthscale = rep(0.1, ncol(X)), bounds = NU

res <- list()
res[['id']] <- if (is.null(id)) uuid::UUIDgenerate() else id
res[['data']][['X']] <- unname(X)
res[['data']][['Y']] <- unname(Y)
rownames(X) <- NULL
rownames(Y) <- NULL
res[['data']][['X']] <- X
res[['data']][['Y']] <- Y
res[['specs']] <- extract_specs(obj, "gp")
res[['specs']][['internal_dims']] <- if( is.null(internal_input_idx) ) 1:n_dim_X else as.integer(reticulate::py_to_r(internal_input_idx)+1)
res[['specs']][['external_dims']] <- if( is.null(internal_input_idx) ) FALSE else as.integer(reticulate::py_to_r(external_input_idx)+1)
Expand Down
67 changes: 19 additions & 48 deletions R/serialization.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@
#'
#' @details See further examples and tutorials at <`r get_docs_url()`>.
#' @note Since the constructed emulators are 'python' objects, they cannot be directly exported to other R processes for parallel
#' processing in multi-session workers created through spawning. This function provides a solution by converting the emulators
#' into serialized objects, which can be restored using [deserialize()] for multi-session processing. Note that in forking,
#' serialization is generally not required.
#' processing. This function provides a solution by converting the emulators into serialized objects, which can be restored
#' using [deserialize()] for multi-process parallel implementation.
#' @examples
#' \dontrun{
#'
#' library(future)
#' library(future.apply)
#' library(parallel)
#' library(dgpsi)
#'
#' # model
Expand All @@ -42,17 +40,25 @@
#' # serialize the DGP emulator
#' m_serialized <- serialize(m)
#'
#' # start a multi-session with three cores for parallel predictions
#' plan(multisession, workers = 3)
#' # create a cluster with 3 workers for parallel predictions
#' cl <- makeCluster(8)
#'
#' # export objects to the cluster
#' clusterExport(cl, varlist = c("m_serialized", "X_dgp"))
#'
#' # initialize deserialized object on each worker
#' res <- clusterEvalQ(cl, {
#' library(dgpsi)
#' assign("m_deserialized", deserialize(m_serialized), envir = .GlobalEnv)
#' })
#'
#' # perform parallel predictions
#' results <- future_lapply(1:length(X_dgp), function(i) {
#' m_deserialized <- deserialize(m_serialized)
#' results <- parLapply(cl, 1:length(X_dgp), function(i) {
#' mean_i <- predict(m_deserialized, X_dgp[i])$results$mean
#' }, future.seed = TRUE)
#' })
#'
#' # reset the future plan to sequential
#' plan(sequential)
#' # reset the cluster
#' stopCluster(cl)
#'
#' # combine mean predictions
#' pred_mean <- do.call(rbind, results)
Expand Down Expand Up @@ -118,42 +124,7 @@ serialize <- function(object, light = TRUE) {
#' @examples
#' \dontrun{
#'
#' library(future)
#' library(future.apply)
#' library(dgpsi)
#'
#' # model
#' f <- function(x) {
#' (sin(7.5*x)+1)/2
#' }
#'
#' # training data
#' X <- seq(0, 1, length = 10)
#' Y <- sapply(X, f)
#'
#' # train a DGP emulator
#' m <- dgp(X, Y, name = "matern2.5")
#'
#' # testing input data
#' X_dgp <- seq(0, 1, length = 100)
#'
#' # serialize the DGP emulator
#' m_serialized <- serialize(m)
#'
#' # start a multi-session with three cores for parallel predictions
#' plan(multisession, workers = 3)
#'
#' # perform parallel predictions
#' results <- future_lapply(1:length(X_dgp), function(i) {
#' m_deserialized <- deserialize(m_serialized)
#' mean_i <- predict(m_deserialized, X_dgp[i])$results$mean
#' }, future.seed = TRUE)
#'
#' # reset the future plan to sequential
#' plan(sequential)
#'
#' # combine mean predictions
#' pred_mean <- do.call(rbind, results)
#' # See serialize() for an example.
#' }
#' @md
#' @export
Expand Down
12 changes: 8 additions & 4 deletions R/update.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,10 @@ update.dgp <- function(object, X, Y, refit = TRUE, reset = FALSE, verb = TRUE, N

new_object <- list()
new_object[['id']] <- object$id
new_object[['data']][['X']] <- unname(X)
new_object[['data']][['Y']] <- unname(Y)
rownames(X) <- NULL
rownames(Y) <- NULL
new_object[['data']][['X']] <- X
new_object[['data']][['Y']] <- Y
new_object[['specs']] <- extract_specs(est_obj, "dgp")
new_object[['specs']][['internal_dims']] <- object[['specs']][['internal_dims']]
new_object[['specs']][['external_dims']] <- object[['specs']][['external_dims']]
Expand Down Expand Up @@ -217,8 +219,10 @@ update.gp <- function(object, X, Y, refit = TRUE, reset = FALSE, verb = TRUE, ..

new_object <- list()
new_object[['id']] <- object$id
new_object[['data']][['X']] <- unname(X)
new_object[['data']][['Y']] <- unname(Y)
rownames(X) <- NULL
rownames(Y) <- NULL
new_object[['data']][['X']] <- X
new_object[['data']][['Y']] <- Y
new_object[['specs']] <- extract_specs(constructor_obj_cp, "gp")
new_object[['specs']][['internal_dims']] <- object[['specs']][['internal_dims']]
new_object[['specs']][['external_dims']] <- object[['specs']][['external_dims']]
Expand Down
4 changes: 2 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ pack <- function(..., id = NULL) {
if ( !identical(res[[i]]$data$X, training_input) ) stop("The function can only pack emulators with common training input data.", call. = FALSE)
Y_dim <- ncol(res[[i]]$data$Y)
if ( Y_dim!=1 ) stop(sprintf("The function is only applicable to emulators with 1D output. Your emulator %i has %i output dimensions.", i, Y_dim), call. = FALSE)
X_all[[paste('emulator', i ,sep="")]] <- unname(training_input)
Y_all[[paste('emulator', i ,sep="")]] <- unname(res[[i]]$data$Y)
X_all[[paste('emulator', i ,sep="")]] <- training_input
Y_all[[paste('emulator', i ,sep="")]] <- res[[i]]$data$Y
names(res)[i] <- paste('emulator', i, sep="")
}
res[['id']] <- if (is.null(id)) uuid::UUIDgenerate() else id
Expand Down
Loading

0 comments on commit 16501d5

Please sign in to comment.