From 730849ccb1b8b82d0ad46f90907c09063a1f2454 Mon Sep 17 00:00:00 2001 From: Deyu Ming Date: Thu, 14 Nov 2024 23:36:48 +0000 Subject: [PATCH] Vignettes updates Vignettes updates --- .Rbuildignore | 3 ++- R/utils.R | 2 +- README.md | 2 +- man/summary.Rd | 2 +- vignettes/classification.Rmd | 4 ++-- vignettes/dgpsi.Rmd | 6 +++--- vignettes/large_scale_emulation.Rmd | 4 ++-- vignettes/linked_DGP.Rmd | 16 ++++++++-------- vignettes/motorcycle.Rmd | 6 +++--- vignettes/seq_design.Rmd | 8 ++++---- vignettes/seq_design_2.Rmd | 6 +++--- 11 files changed, 30 insertions(+), 29 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 440c43f..c3c6726 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,5 +9,6 @@ ^\.gitignore$ ^CRAN-SUBMISSION$ ^RESEARCH-NOTICE\.md$ -^vignettes/images/.*\.html$ +^vignettes/images/ ^vignettes/motorcycle.Rmd$ +^man/figures/dgpsi_logo.png$ diff --git a/R/utils.R b/R/utils.R index d23f895..c0e3ce2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -575,7 +575,7 @@ read <- function(pkl_file) { #' If set to `"table"`, the function returns a summary in table. If set to `"plot"`, the function #' returns an interactive visualization. Defaults to `"plot"`. If the `object` was created with #' `lgp()` where `struc` is not a data frame, `type` will automatically default to `"table"`. -#' @param group_size an integer secifying the number of consecutive layers to be grouped together +#' @param group_size an integer specifying the number of consecutive layers to be grouped together #' in the interactive visualization of linked emulators when `type = "plot"`. #' This argument is only applicable if `object` is an instance of the `lgp` class. #' Defaults to `1`. diff --git a/README.md b/README.md index d33046a..c4d7159 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# dgpsi +# dgpsi [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/dgpsi)](https://CRAN.R-project.org/package=dgpsi) [![Download](https://cranlogs.r-pkg.org/badges/grand-total/dgpsi?color=brightgreen)](https://CRAN.R-project.org/package=dgpsi) diff --git a/man/summary.Rd b/man/summary.Rd index 1f354e8..7537a1f 100644 --- a/man/summary.Rd +++ b/man/summary.Rd @@ -28,7 +28,7 @@ returns an interactive visualization. Defaults to \code{"plot"}. If the \code{ob \item{...}{N/A.} -\item{group_size}{an integer secifying the number of consecutive layers to be grouped together +\item{group_size}{an integer specifying the number of consecutive layers to be grouped together in the interactive visualization of linked emulators when \code{type = "plot"}. This argument is only applicable if \code{object} is an instance of the \code{lgp} class. Defaults to \code{1}.} diff --git a/vignettes/classification.Rmd b/vignettes/classification.Rmd index 8c39df2..40c15a1 100644 --- a/vignettes/classification.Rmd +++ b/vignettes/classification.Rmd @@ -107,7 +107,7 @@ plot(m_dgp, X_test, Y_test) ## Plotting ... done ``` -![](images/classifier_oos_1.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/classifier_oos_1.png){width=90%} By default, `plot()` displays true labels against predicted label proportions at each input position. Alternatively, setting `style = 2` in `plot()` generates a confusion matrix: @@ -121,7 +121,7 @@ plot(m_dgp, X_test, Y_test, style = 2) ## Plotting ... done ``` -![](images/classifier_oos_2.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/classifier_oos_2.png){width=90%} ### References diff --git a/vignettes/dgpsi.Rmd b/vignettes/dgpsi.Rmd index 3175699..0a43dca 100644 --- a/vignettes/dgpsi.Rmd +++ b/vignettes/dgpsi.Rmd @@ -92,7 +92,7 @@ plot(m) ## Plotting ... done ``` -![](images/step_fct_loo.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/step_fct_loo.png){width=90%} The second validation plot is the Out-Of-Sample (OOS) validation plot that requires an out-of-sample testing data set. Here we generate an OOS data set that contains 10 testing data points @@ -113,7 +113,7 @@ plot(m,oos_x,oos_y) ## Plotting ... done ``` -![](images/step_fct_oos.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/step_fct_oos.png){width=90%} ## Prediction @@ -146,4 +146,4 @@ lines(test_x, mu, type = 'l', lty = 2, lwd = 1.5, col = 'black') lines(X, Y, type = 'p', pch = 16, cex = 1, col = "#0072B2") # Training data points ``` -![](images/step_fct_result.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/step_fct_result.png){width=90%} diff --git a/vignettes/large_scale_emulation.Rmd b/vignettes/large_scale_emulation.Rmd index 5b4963c..0e329ec 100644 --- a/vignettes/large_scale_emulation.Rmd +++ b/vignettes/large_scale_emulation.Rmd @@ -83,7 +83,7 @@ plot(m) ## Plotting ... done ``` -![](images/vecchia_loo.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/vecchia_loo.png){width=90%} or the Out-Of-Sample (OOS) validation plot over `1000` randomly generated testing locations: @@ -99,7 +99,7 @@ plot(m, oos_x, oos_y) ## Plotting ... done ``` -![](images/vecchia_oos.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/vecchia_oos.png){width=90%} ### Performance tip diff --git a/vignettes/linked_DGP.Rmd b/vignettes/linked_DGP.Rmd index 4ad362c..ec9fea5 100644 --- a/vignettes/linked_DGP.Rmd +++ b/vignettes/linked_DGP.Rmd @@ -30,7 +30,7 @@ library(dgpsi) We consider the following synthetic system -![](images/linked_model.png){width="60%"} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/linked_model.png){width="60%"} that involves three models defined by: @@ -99,7 +99,7 @@ plot(m1) ## Plotting ... done ``` -![](images/model_1_loo.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/model_1_loo.png){width=90%} ## Emulation of Model 2 @@ -135,7 +135,7 @@ Post-processing LOO results ... done Plotting ... done ``` -![](images/model_2_loo.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/model_2_loo.png){width=90%} ## Emulation of Model 3 @@ -165,7 +165,7 @@ Post-processing LOO results ... done Plotting ... done ``` -![](images/model_3_loo.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/model_3_loo.png){width=90%} ## Emulation of Linked Model @@ -256,7 +256,7 @@ plot(m_gp, x_test = test_x, y_test = test_y, type = 'line', verb = F) + plot_annotation(title = 'GP Emulator', theme = theme(plot.title = element_text(hjust = 0.5))) ``` -![](images/linked_result.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/linked_result.png){width=90%} ```{r} # Linked emulator @@ -264,7 +264,7 @@ plot(m_link, x_test = test_x, y_test = test_y, type = 'line', verb = F) + plot_annotation(title = 'Linked Emulator', theme = theme(plot.title = element_text(hjust = 0.5))) ``` -![](images/linked_result_new.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/linked_result_new.png){width=90%} It can be seen from the above plots that the linked emulator outperforms the GP emulator with significantly better mean predictions and predictive uncertainties. @@ -289,7 +289,7 @@ plot(m_gp, test_x_oos, test_y_oos, style = 2) ## Plotting ... done ``` -![](images/gp_oos.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/gp_oos.png){width=90%} and the linked emulator: @@ -303,6 +303,6 @@ plot(m_link, test_x_oos, test_y_oos, style = 2) ## Plotting ... done ``` -![](images/linked_oos.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/linked_oos.png){width=90%} which show that the linked emulator outperforms the GP emulator with significantly better predictive accuracy and lower NRMSE. diff --git a/vignettes/motorcycle.Rmd b/vignettes/motorcycle.Rmd index 290a808..bf62d5c 100644 --- a/vignettes/motorcycle.Rmd +++ b/vignettes/motorcycle.Rmd @@ -49,7 +49,7 @@ and plot them: plot(X, Y, pch = 16, cex = 1, xlab = 'Time', ylab = 'Acceleration', cex.axis = 1.3, cex.lab = 1.3) ``` -![](images/motorcycle_data.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/motorcycle_data.png){width=90%} Before constructing an emulator, we first specify a seed with `set_seed()` from the package for reproducibility @@ -147,7 +147,7 @@ plot(m_gp, test_x, test_y) ## Plotting ... done ``` -![](images/motorcycle_gp_oos.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/motorcycle_gp_oos.png){width=90%} and for the DGP emulator: @@ -161,7 +161,7 @@ plot(m_dgp, test_x, test_y) ## Plotting ... done ``` -![](images/motorcycle_dgp_oos.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/motorcycle_dgp_oos.png){width=90%} Note that we still need to provide `test_x` and `test_y` to `plot()` even they have already been provided to `validate()`. Otherwise, `plot()` will draw the LOO cross validation plot. The visualizations above show that the DGP emulator gives a better performance than the GP emulator on modeling the heteroskedastic noises embedded in the underlying data set, even though they have quite similar NRMSEs. diff --git a/vignettes/seq_design.Rmd b/vignettes/seq_design.Rmd index edc346f..06043ab 100644 --- a/vignettes/seq_design.Rmd +++ b/vignettes/seq_design.Rmd @@ -49,7 +49,7 @@ ggplot(dat, aes(x1, x2, fill = f)) + geom_tile() + scale_fill_continuous(type = "viridis") ``` -![](images/seq_fct.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/seq_fct.png){width=90%} We can see from the figure above that the synthetic simulator exhibits more fluctuations on the bottom left of its input space while in the top-right part the simulator shows little variations. @@ -193,14 +193,14 @@ After the sequential design is done, we can inspect the enriched design by apply draw(m, 'design') ``` -![](images/seq_design.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/seq_design.png){width=90%} It can be seen from the figure above that most of the added design points concentrate at the bottom-left corner of the input space where the simulator `f` exhibits more variations and thus needs more data to be well-emulated. We can also visualize the changes of qualities (in terms of RMSEs wrt the validation dataset) of emulators constructed during the three waves of sequential designs: ```{r} draw(m, 'rmse') ``` -![](images/seq_rmse.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/seq_rmse.png){width=90%} ## Comparison to DGP emulators with space-filling designs @@ -271,7 +271,7 @@ draw(m, 'rmse', log = T) + scale_shape_manual(values = c(2, 3, 4, 8, 15)) ``` -![](images/seq_comparison.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/seq_comparison.png){width=90%} It can be seen from the plot above that with static space-filling designs, the quality of an emulator may not be improved as the design size increases. This is because increasing the size of a space-filling design may not capture regions where the simulator exhibits more variations, and thus cause DGP emulators with higher RMSEs than those constructed through the sequential design. diff --git a/vignettes/seq_design_2.Rmd b/vignettes/seq_design_2.Rmd index a433d18..07f6aca 100644 --- a/vignettes/seq_design_2.Rmd +++ b/vignettes/seq_design_2.Rmd @@ -54,7 +54,7 @@ p3 <- ggplot(data = output3, aes(x = x, y = y)) + geom_line(color = 'green4') + wrap_plots(list(p1, p2, p3)) + plot_annotation(title = 'Synthetic Simulator') ``` -![](images/seq2_fct.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/seq2_fct.png){width=90%} We now specify a seed with `set_seed()` from the package for reproducibility @@ -241,7 +241,7 @@ draw(m, 1, 'design') + draw(m, 2, 'design') + draw(m, 3, 'design') + plot_layout() & theme(legend.position = 'bottom') ``` -![](images/seq2_design.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/seq2_design.png){width=90%} The figure above shows that for the first emulator most of the design points are added below 0.5 whilst for the second emulator most of the design points concentrate around 0.5. For the third emulator, the resulting design is space-filling. It can be seen that these design point distributions are consistent with the functional complexities of the three outputs. @@ -311,7 +311,7 @@ p3 <- draw(m, emulator = 3, type = 'rmse', log = T) + p1 + p2 + p3 + plot_layout(guides = 'collect') & theme(legend.position = 'bottom') ``` -![](images/seq2_rmse.png){width=90%} +![](https://raw.githubusercontent.com/mingdeyu/dgpsi-R/master/vignettes/images/seq2_rmse.png){width=90%} It can be seen from the plot above that with the sequential design, emulators in the bundle can achieve higher or similar accuracy with smaller number of design points.