From ee252611e13067003cdd2492b048151d164277b0 Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Mon, 24 Jun 2024 21:21:29 +0200
Subject: [PATCH 01/16] Updated boruta documentation

---
 DESCRIPTION    |  2 +-
 NAMESPACE      |  1 +
 R/boruta.R     | 20 ++++++++++++++++++++
 man/boruta.Rd  | 39 +++++++++++++++++++++++++++++++++++++++
 man/toTable.Rd |  4 ++--
 semtree.Rproj  |  1 +
 6 files changed, 64 insertions(+), 3 deletions(-)
 create mode 100644 man/boruta.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 97dcece..3486b46 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -55,7 +55,7 @@ Encoding: UTF-8
 LazyLoad: yes
 Version: 0.9.19
 Date: 2023-03-03
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 VignetteBuilder: knitr
 BugReports: https://github.com/brandmaier/semtree/issues
 URL: https://github.com/brandmaier/semtree
diff --git a/NAMESPACE b/NAMESPACE
index bc263c8..db0d57e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -32,6 +32,7 @@ S3method(strip,semtree)
 S3method(summary,semforest)
 S3method(summary,semtree)
 export(biodiversity)
+export(boruta)
 export(diversityMatrix)
 export(evaluateTree)
 export(fitSubmodels)
diff --git a/R/boruta.R b/R/boruta.R
index 8ba9da3..5b6cb03 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -1,3 +1,23 @@
+
+#' Run the Boruta algorithm on a sem tree
+#' 
+#' Grows a series of SEM Forests following the boruta algorithm to determine
+#'    feature importance as moderators of the underlying model.
+#' 
+#' 
+#' @aliases boruta plot.boruta print.boruta
+#' @param model A template SEM. Same as in \code{semtree}.
+#' @param data A dataframe to boruta on. Same as in \code{semtree}.
+#' @param control A semforest control object to set forest parameters.
+#' @param predictors An optional list of covariates. See semtree code example.
+#' @param constraints An optional list of covariates. See semtree code example.
+#' @param \dots Optional parameters.
+#' @return A boruta object.
+#' @author Priyanka Paul, Timothy R. Brick, Andreas Brandmaier
+#' @seealso \code{\link{semtree}} \code{\link{semforest}}
+#' 
+#' @keywords tree models multivariate
+#' @export
 boruta <- function(model,
                    data,
                    control = NULL,
diff --git a/man/boruta.Rd b/man/boruta.Rd
new file mode 100644
index 0000000..4921d25
--- /dev/null
+++ b/man/boruta.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/boruta.R
+\name{boruta}
+\alias{boruta}
+\alias{plot.boruta}
+\alias{print.boruta}
+\title{Run the Boruta algorithm on a sem tree}
+\usage{
+boruta(model, data, control = NULL, predictors = NULL, ...)
+}
+\arguments{
+\item{model}{A template SEM. Same as in \code{semtree}.}
+
+\item{data}{A dataframe to boruta on. Same as in \code{semtree}.}
+
+\item{control}{A semforest control object to set forest parameters.}
+
+\item{predictors}{An optional list of covariates. See semtree code example.}
+
+\item{\dots}{Optional parameters.}
+
+\item{constraints}{An optional list of covariates. See semtree code example.}
+}
+\value{
+A boruta object.
+}
+\description{
+Grows a series of SEM Forests following the boruta algorithm to determine
+   feature importance as moderators of the underlying model.
+}
+\seealso{
+\code{\link{semtree}} \code{\link{semforest}}
+}
+\author{
+Priyanka Paul, Timothy R. Brick, Andreas Brandmaier
+}
+\keyword{models}
+\keyword{multivariate}
+\keyword{tree}
diff --git a/man/toTable.Rd b/man/toTable.Rd
index 4fa608e..bd92d95 100644
--- a/man/toTable.Rd
+++ b/man/toTable.Rd
@@ -9,9 +9,9 @@ toTable(tree, added.param.cols = NULL, round.param = NULL)
 \arguments{
 \item{tree}{A SEM Tree object.}
 
-\item{added.param.cols}{Add extra columns with parameter estimates.}
+\item{added.param.cols}{String. Add extra columns with parameter estimates. Pass a vector with the names of the parameters that should be rendered in the table.}
 
-\item{round.param}{Number of digits to round parameter estiamtes}
+\item{round.param}{Integer. Number of digits to round parameter estimates. Default is no rounding (NULL)}
 }
 \description{
 Converts a tree into a tabular representation. This may be useful as a
diff --git a/semtree.Rproj b/semtree.Rproj
index 21a4da0..eaa6b81 100644
--- a/semtree.Rproj
+++ b/semtree.Rproj
@@ -15,3 +15,4 @@ LaTeX: pdfLaTeX
 BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace

From da0cfe4ac32646a126bc6132614896fa351274d8 Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Mon, 24 Jun 2024 21:22:58 +0200
Subject: [PATCH 02/16] Added simple boruta test

---
 tests/borutaOneIndicator.R | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 tests/borutaOneIndicator.R

diff --git a/tests/borutaOneIndicator.R b/tests/borutaOneIndicator.R
new file mode 100644
index 0000000..be365a3
--- /dev/null
+++ b/tests/borutaOneIndicator.R
@@ -0,0 +1,20 @@
+# Tests a simple boruta model on a basic one-indicator SEM
+
+library(OpenMx)
+
+N <- 1000
+influence <- c(1, 0, 0, 0)
+nPred <- length(influence)
+
+
+genModel <- mxModel(type="RAM", manifestVars=c("Y", paste0("X", 1:nPred)),
+                    mxPath(paste0("X", 1:nPred), "Y", values=influence),
+                    mxPath(c("Y", paste0("X", 1:nPred)), arrows=2, values=1))
+simpleData <- mxGenerateData(genModel, 1000)
+
+testModel <- mxModel("SimplisticModel", type="RAM", manifestVars="Y", 
+                     mxPath("Y", arrows=2, values=1, free=TRUE),
+                     mxPath("one", "Y", values=0, free=TRUE), 
+                     mxData(simpleData, type="raw"))
+
+boruta(testModel, simpleData, verbose=TRUE)
\ No newline at end of file

From 56695bb311ac3712b96509d60c3ad685dfbc3d2a Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Mon, 24 Jun 2024 21:58:19 +0200
Subject: [PATCH 03/16] Minor updates to simplify the boruta test.

---
 tests/borutaOneIndicator.R | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/borutaOneIndicator.R b/tests/borutaOneIndicator.R
index be365a3..b159120 100644
--- a/tests/borutaOneIndicator.R
+++ b/tests/borutaOneIndicator.R
@@ -3,18 +3,23 @@
 library(OpenMx)
 
 N <- 1000
-influence <- c(1, 0, 0, 0)
+influence <- c(1, 0)
 nPred <- length(influence)
+cutbreaks <- 3
 
 
 genModel <- mxModel(type="RAM", manifestVars=c("Y", paste0("X", 1:nPred)),
                     mxPath(paste0("X", 1:nPred), "Y", values=influence),
                     mxPath(c("Y", paste0("X", 1:nPred)), arrows=2, values=1))
 simpleData <- mxGenerateData(genModel, 1000)
+for(i in paste0("X", 1:nPred)) {
+  simpleData[[i]] <- cut(simpleData[[i]], cutbreaks)
+}
 
 testModel <- mxModel("SimplisticModel", type="RAM", manifestVars="Y", 
                      mxPath("Y", arrows=2, values=1, free=TRUE),
                      mxPath("one", "Y", values=0, free=TRUE), 
                      mxData(simpleData, type="raw"))
 
-boruta(testModel, simpleData, verbose=TRUE)
\ No newline at end of file
+output <- boruta(testModel, simpleData, verbose=TRUE)
+output
\ No newline at end of file

From 47f18e11d5c000b5d65260c79819e7362c009e91 Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Mon, 24 Jun 2024 22:00:52 +0200
Subject: [PATCH 04/16] Fixed a bug in borutaOneIndicator test

---
 tests/borutaOneIndicator.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/borutaOneIndicator.R b/tests/borutaOneIndicator.R
index b159120..52b73ca 100644
--- a/tests/borutaOneIndicator.R
+++ b/tests/borutaOneIndicator.R
@@ -11,7 +11,7 @@ cutbreaks <- 3
 genModel <- mxModel(type="RAM", manifestVars=c("Y", paste0("X", 1:nPred)),
                     mxPath(paste0("X", 1:nPred), "Y", values=influence),
                     mxPath(c("Y", paste0("X", 1:nPred)), arrows=2, values=1))
-simpleData <- mxGenerateData(genModel, 1000)
+simpleData <- mxGenerateData(genModel, N)
 for(i in paste0("X", 1:nPred)) {
   simpleData[[i]] <- cut(simpleData[[i]], cutbreaks)
 }

From 6a705eaa475362146b6843954bbe3074c735c43d Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Mon, 1 Jul 2024 15:52:33 +0200
Subject: [PATCH 05/16] TODO notes on the meeting

---
 R/boruta.R | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/R/boruta.R b/R/boruta.R
index 5b6cb03..a2d44df 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -40,6 +40,8 @@ boruta <- function(model,
     ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!");
   }
   
+  # TODO: Loop over runs has to start here
+  
   # stage 1 - create shadow features
 
   shadow.ids <- (ncol(data)+1):(ncol(data)+length(covariate.ids))
@@ -66,11 +68,31 @@ boruta <- function(model,
   max_shadow_importance <- max(agvim[names(agvim)%in%shadow_names])  
   agvim_filtered <- agvim[!(names(agvim)%in%shadow_names)]
   
+  # TODO: Apply binomial test across feature importance values:
+  # Code below from the Boruta package:
+  # decReg is the decision registry: one of "Tentative" "Accept" or "Reject"
+  # hitReg is a list the length of all features.
+  #     hitReg starts at all zeros, and increments each run that a given feature has
+  #     an importance greater than shadowmax.
+  # toAccept<-stats::p.adjust(stats::pbinom(hitReg-1,runs,0.5,lower.tail=FALSE),method=pAdjMethod)<pValue
+  # (decReg=="Tentative" & toAccept)->toAccept
+  # toReject<-stats::p.adjust(stats::pbinom(hitReg,runs,0.5,lower.tail=TRUE),method=pAdjMethod)<pValue
+  # (decReg=="Tentative" & toReject)->toReject
+  #
+  # The biasing here means that there are no decisions without correction before 5 runs
+  #   and no decisions with Bonferroni before 7 runs.
+  
+  # TODO: Parallelize the first five runs.
+  
   df<-data.frame(importance=agvim_filtered, predictor=names(agvim_filtered))
   
-  vim$filter <- agvim_filtered>max_shadow_importance
+  # TODO: track importance history
+  # vim$importanceHistory <- 
+  vim$filter <- agvim_filtered>max_shadow_importance  # Turns into hitreg
   vim$boruta <- TRUE
   vim$boruta_threshold = max_shadow_importance
   
+  # TODO: Loop ends here with some reporting.
+  
   return(vim)
 }

From f7bfe6d981fc7c810adff9bfec8863d6df83e981 Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Wed, 3 Jul 2024 15:40:20 +0200
Subject: [PATCH 06/16] .gitignore .DS_Store (MacOS search index) files.

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 47281dc..7836fb7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
 inst/doc
 doc
 Meta
+.DS_Store

From 259814ba371f3466e99e692c9e12c363fbc9d8c5 Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Wed, 3 Jul 2024 15:53:13 +0200
Subject: [PATCH 07/16] Converted to score-test splitting.

---
 tests/borutaOneIndicator.R | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/borutaOneIndicator.R b/tests/borutaOneIndicator.R
index 52b73ca..6979bfc 100644
--- a/tests/borutaOneIndicator.R
+++ b/tests/borutaOneIndicator.R
@@ -1,25 +1,32 @@
 # Tests a simple boruta model on a basic one-indicator SEM
 
 library(OpenMx)
+library(semtree)
 
 N <- 1000
 influence <- c(1, 0)
 nPred <- length(influence)
 cutbreaks <- 3
+categorical <- FALSE
 
 
 genModel <- mxModel(type="RAM", manifestVars=c("Y", paste0("X", 1:nPred)),
                     mxPath(paste0("X", 1:nPred), "Y", values=influence),
                     mxPath(c("Y", paste0("X", 1:nPred)), arrows=2, values=1))
 simpleData <- mxGenerateData(genModel, N)
-for(i in paste0("X", 1:nPred)) {
-  simpleData[[i]] <- cut(simpleData[[i]], cutbreaks)
+if(make_categorical) {
+  for(i in paste0("X", 1:nPred)) {
+    simpleData[[i]] <- cut(simpleData[[i]], cutbreaks)
+    simpleData[[i]] <- mxFactor(simpleData[[i]], levels(simpleData[[i]]))
+  }
 }
 
 testModel <- mxModel("SimplisticModel", type="RAM", manifestVars="Y", 
-                     mxPath("Y", arrows=2, values=1, free=TRUE),
-                     mxPath("one", "Y", values=0, free=TRUE), 
+                     mxPath("Y", arrows=2, values=1, free=TRUE, labels=c("Var")),
+                     mxPath("one", "Y", values=0, free=TRUE, labels=c("mu")), 
                      mxData(simpleData, type="raw"))
 
-output <- boruta(testModel, simpleData, verbose=TRUE)
-output
\ No newline at end of file
+control <- semforest.control(control=semtree.control(method="score", alpha=1, verbose=T))
+
+output <- boruta(testModel, simpleData, verbose=TRUE, control = control)
+output

From ae525a2c93aacc2e8687ff2db2fd8a5e0fa30ae1 Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Wed, 3 Jul 2024 15:53:13 +0200
Subject: [PATCH 08/16] Converted to score-test splitting.

---
 tests/borutaOneIndicator.R | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/borutaOneIndicator.R b/tests/borutaOneIndicator.R
index 52b73ca..f8c03b1 100644
--- a/tests/borutaOneIndicator.R
+++ b/tests/borutaOneIndicator.R
@@ -1,25 +1,32 @@
 # Tests a simple boruta model on a basic one-indicator SEM
 
 library(OpenMx)
+library(semtree)
 
 N <- 1000
 influence <- c(1, 0)
 nPred <- length(influence)
 cutbreaks <- 3
+make_categorical <- FALSE
 
 
 genModel <- mxModel(type="RAM", manifestVars=c("Y", paste0("X", 1:nPred)),
                     mxPath(paste0("X", 1:nPred), "Y", values=influence),
                     mxPath(c("Y", paste0("X", 1:nPred)), arrows=2, values=1))
 simpleData <- mxGenerateData(genModel, N)
-for(i in paste0("X", 1:nPred)) {
-  simpleData[[i]] <- cut(simpleData[[i]], cutbreaks)
+if(make_categorical) {
+  for(i in paste0("X", 1:nPred)) {
+    simpleData[[i]] <- cut(simpleData[[i]], cutbreaks)
+    simpleData[[i]] <- mxFactor(simpleData[[i]], levels(simpleData[[i]]))
+  }
 }
 
 testModel <- mxModel("SimplisticModel", type="RAM", manifestVars="Y", 
-                     mxPath("Y", arrows=2, values=1, free=TRUE),
-                     mxPath("one", "Y", values=0, free=TRUE), 
+                     mxPath("Y", arrows=2, values=1, free=TRUE, labels=c("Var")),
+                     mxPath("one", "Y", values=0, free=TRUE, labels=c("mu")), 
                      mxData(simpleData, type="raw"))
 
-output <- boruta(testModel, simpleData, verbose=TRUE)
-output
\ No newline at end of file
+control <- semforest.control(control=semtree.control(method="score", alpha=1, verbose=T))
+
+output <- boruta(testModel, simpleData, verbose=TRUE, control = control)
+output

From 708541c5fbedf6c4ddfda972162580a5dfb07d02 Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Wed, 3 Jul 2024 23:59:05 +0200
Subject: [PATCH 09/16] Boruta test update: default to categorical; disable
 verbosity.

---
 tests/borutaOneIndicator.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/borutaOneIndicator.R b/tests/borutaOneIndicator.R
index f8c03b1..7e73b27 100644
--- a/tests/borutaOneIndicator.R
+++ b/tests/borutaOneIndicator.R
@@ -7,7 +7,7 @@ N <- 1000
 influence <- c(1, 0)
 nPred <- length(influence)
 cutbreaks <- 3
-make_categorical <- FALSE
+make_categorical <- TRUE
 
 
 genModel <- mxModel(type="RAM", manifestVars=c("Y", paste0("X", 1:nPred)),
@@ -26,7 +26,7 @@ testModel <- mxModel("SimplisticModel", type="RAM", manifestVars="Y",
                      mxPath("one", "Y", values=0, free=TRUE, labels=c("mu")), 
                      mxData(simpleData, type="raw"))
 
-control <- semforest.control(control=semtree.control(method="score", alpha=1, verbose=T))
+control <- semforest.control(control=semtree.control(method="score", alpha=1))
 
 output <- boruta(testModel, simpleData, verbose=TRUE, control = control)
 output

From 120e69f837c8b1eb9cb8a69ba5394cfba9c0e85f Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Wed, 3 Jul 2024 23:59:35 +0200
Subject: [PATCH 10/16] Added looping and binomial tests.

---
 R/boruta.R | 145 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 101 insertions(+), 44 deletions(-)

diff --git a/R/boruta.R b/R/boruta.R
index a2d44df..7ced65c 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -22,6 +22,10 @@ boruta <- function(model,
                    data,
                    control = NULL,
                    predictors = NULL,
+                   maxRuns = 30,
+                   pAdjMethod = "none",
+                   alpha = .05,
+                   verbose=FALSE,
                    ...) {
   
  
@@ -40,59 +44,112 @@ boruta <- function(model,
     ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!");
   }
   
-  # TODO: Loop over runs has to start here
+  # Checks on x & y from the boruta package
+  if(length(grep('^shadow',covariate.ids)>0)) 
+    stop('Attributes with names starting from "shadow" are reserved for internal use. Please rename them.')
+  if(maxRuns<11)
+    stop('maxRuns must be greater than 10.')
+  if(!pAdjMethod %in% stats::p.adjust.methods) 
+    stop(c('P-value adjustment method not found. Must be one of:', 
+           stats::p.adjust.methods))
   
-  # stage 1 - create shadow features
-
-  shadow.ids <- (ncol(data)+1):(ncol(data)+length(covariate.ids))
-  
-  for (cur_cov_id in covariate.ids) {
-    # pick column and shuffle
-    temp_column <- data[, cur_cov_id]
-    temp_column <- sample(temp_column, length(temp_column), replace=FALSE)
-    # add to dataset as shadow feature
-    temp_colname <- paste0("shadow_", names(data)[cur_cov_id],collapse="")
-    data[temp_colname] <- temp_column
-    if (!is.null(predictors)) predictors <- c(predictors, temp_colname)
+  # Might clash with some other semtrees stuff
+  if(is.null(predictors)) {
+    predictors <- names(data)[covariate.ids]
   }
   
-  # run the forest
-  forest <- semforest(model, data, control, predictors, ...)
-  
-  # run variable importance
-  vim <- varimp(forest)
-  
-  # get variable importance from shadow features
-  shadow_names <- names(data)[shadow.ids]
-  agvim <- aggregateVarimp(vim, aggregate="mean")
-  max_shadow_importance <- max(agvim[names(agvim)%in%shadow_names])  
-  agvim_filtered <- agvim[!(names(agvim)%in%shadow_names)]
-  
-  # TODO: Apply binomial test across feature importance values:
-  # Code below from the Boruta package:
-  # decReg is the decision registry: one of "Tentative" "Accept" or "Reject"
-  # hitReg is a list the length of all features.
-  #     hitReg starts at all zeros, and increments each run that a given feature has
-  #     an importance greater than shadowmax.
-  # toAccept<-stats::p.adjust(stats::pbinom(hitReg-1,runs,0.5,lower.tail=FALSE),method=pAdjMethod)<pValue
-  # (decReg=="Tentative" & toAccept)->toAccept
-  # toReject<-stats::p.adjust(stats::pbinom(hitReg,runs,0.5,lower.tail=TRUE),method=pAdjMethod)<pValue
-  # (decReg=="Tentative" & toReject)->toReject
-  #
-  # The biasing here means that there are no decisions without correction before 5 runs
-  #   and no decisions with Bonferroni before 7 runs.
-  
+  # Initialize and then loop over runs:
+  impHistory <- data.frame(matrix(NA, nrow=0, ncol=length(predictors)+3))
+  names(impHistory) <- c(predictors, "shadowMin", "shadowMean", "shadowMax")
+  decisionList <- data.frame(predictor=predictors, decision = "Tentative",
+                             importance = NA, hitCount = 0, raw.p=NA, adjusted.p=NA)
+
   # TODO: Parallelize the first five runs.
+  for(runNo in 1:maxRuns) {
+    if(verbose) {
+      message(paste("Beginning Run", runNo))
+    }
+    
+    # stage 1 - create shadow features
+    rejected <- decisionList$predictor[decisionList$decision == "Rejected"]
+    current.predictors <- setdiff(predictors, rejected)
+    current.covariate.ids <- setdiff(covariate.ids, names(data) %in% rejected)
+    current.data <- data[, setdiff(names(data), rejected)]
+    
+    shadow.ids <- (ncol(current.data)+1):(ncol(current.data)+length(current.covariate.ids))
+    
+    for (cur_cov_id in current.covariate.ids) {
+      # pick column and shuffle
+      temp_column <- current.data[, cur_cov_id]
+      temp_column <- sample(temp_column, length(temp_column), replace=FALSE)
+      # add to dataset as shadow feature
+      temp_colname <- paste0("shadow_", names(current.data)[cur_cov_id],collapse="")
+      current.data[temp_colname] <- temp_column
+      if (!is.null(current.predictors)) current.predictors <- c(current.predictors, temp_colname)
+    }
+    
+    # TODO: Pre-run model if needed.
+    
+    # run the forest
+    forest <- semforest(model, current.data, control, current.predictors, ...)
+  
+    # run variable importance
+    vim <- varimp(forest)
   
-  df<-data.frame(importance=agvim_filtered, predictor=names(agvim_filtered))
+    # get variable importance from shadow features
+    shadow_names <- names(current.data)[shadow.ids]
+    agvim <- aggregateVarimp(vim, aggregate="mean")
+    
+    # Compute shadow stats
+    shadow_importances <- agvim[names(agvim)%in%shadow_names]
+    impHistory[runNo, "shadowMax"] <- max_shadow_importance <- max(shadow_importances)  
+    impHistory[runNo, "shadowMin"] <- min(shadow_importances)
+    impHistory[runNo, "shadowMean"] <- mean(shadow_importances)
+    agvim_filtered <- agvim[!(names(agvim)%in%shadow_names)]
+    impHistory[runNo, names(agvim_filtered)] <- agvim_filtered
+    
+    # Compute "hits" 
+    hits <- decisionList$predictor %in% names(agvim_filtered[agvim_filtered>max_shadow_importance])
+    decisionList$hitCount[hits] <- decisionList$hitCount[hits] + 1
+    
+    # Run tests. 
+    # The biasing here means that there are no decisions without correction 
+    #   before 5 runs and no decisions with Bonferroni before 7 runs.
+    
+    # Run confirmation tests (pulled from Boruta package)
+    newPs <- stats::pbinom(decisionList$hitCount-1,runNo,0.5,lower.tail=FALSE)
+    adjPs <- stats::p.adjust(newPs,method=pAdjMethod)
+    acceptable <- adjPs < alpha
+    updateList <- acceptable & decisionList$decision == "Tentative"
+    decisionList$raw.p[updateList] <- newPs[updateList]
+    decisionList$adjusted.p[updateList] <- adjPs[updateList]
+    decisionList$decision[updateList] <- "Confirmed"
+    
+    # Run rejection tests (pulled from Boruta package)
+    newPs <- stats::pbinom(decisionList$hitCount,runNo,0.5,lower.tail=TRUE)
+    adjPs <- stats::p.adjust(newPs,method=pAdjMethod)
+    acceptable <- adjPs < alpha
+    updateList <- acceptable & decisionList$decision == "Tentative"
+    decisionList$raw.p[updateList] <- newPs[updateList]
+    decisionList$adjusted.p[updateList] <- adjPs[updateList]
+    decisionList$decision[updateList] <- "Rejected"
+    
+    if(!any(decisionList$decision == "Tentative")) {
+      break
+    }
+    
+  }
   
-  # TODO: track importance history
-  # vim$importanceHistory <- 
-  vim$filter <- agvim_filtered>max_shadow_importance  # Turns into hitreg
+  vim$importance <- colMeans(impHistory, na.rm = TRUE)
+  vim$impHistory <- impHistory
+  vim$decisions <- decisionList$decision
+  vim$details <- decisionList
+
+  vim$filter <- decisionList$decision == "Confirmed"  # Turns into hitreg
   vim$boruta <- TRUE
-  vim$boruta_threshold = max_shadow_importance
   
   # TODO: Loop ends here with some reporting.
   
   return(vim)
 }
+

From 41af79067b6ff73a3bbb7dac6e4a7bc56c80ca7b Mon Sep 17 00:00:00 2001
From: trbrick <tbrick@psu.edu>
Date: Thu, 4 Jul 2024 00:12:24 +0200
Subject: [PATCH 11/16] Documentation improvements.

---
 R/boruta.R | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/R/boruta.R b/R/boruta.R
index 7ced65c..cfe0ff5 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -10,9 +10,19 @@
 #' @param data A dataframe to boruta on. Same as in \code{semtree}.
 #' @param control A semforest control object to set forest parameters.
 #' @param predictors An optional list of covariates. See semtree code example.
-#' @param constraints An optional list of covariates. See semtree code example.
-#' @param \dots Optional parameters.
-#' @return A boruta object.
+#' @param maxRuns Maximum number of boruta search cycles
+#' @param pAdjMethod A value from \link{stats::p.adjust.methods} defining a 
+#'          multiple testing correction method
+#' @param alpha p-value cutoff for decisionmaking. Default .05
+#' @param verbose Verbosity level for boruta processing 
+#'          similar to the same argument in \link{semtree.control} and 
+#'          \link{semforest.control}
+#' @param \dots Optional parameters to undefined subfunctions
+#' @return A vim object with several elements that need work. 
+#'          Of particular note, `$importance` carries mean importance; 
+#'          `$decision` denotes Accepted/Rejected/Tentative;
+#'          `$impHistory` has the entire varimp history; and
+#'          `$details` has exit values for each parameter.
 #' @author Priyanka Paul, Timothy R. Brick, Andreas Brandmaier
 #' @seealso \code{\link{semtree}} \code{\link{semforest}}
 #' 
@@ -62,7 +72,7 @@ boruta <- function(model,
   impHistory <- data.frame(matrix(NA, nrow=0, ncol=length(predictors)+3))
   names(impHistory) <- c(predictors, "shadowMin", "shadowMean", "shadowMax")
   decisionList <- data.frame(predictor=predictors, decision = "Tentative",
-                             importance = NA, hitCount = 0, raw.p=NA, adjusted.p=NA)
+                             hitCount = 0, raw.p=NA, adjusted.p=NA)
 
   # TODO: Parallelize the first five runs.
   for(runNo in 1:maxRuns) {

From 731bebbaaf6fcb080e256e01e94838975c847fc0 Mon Sep 17 00:00:00 2001
From: LeonieHagitte <leonie.hagitte@student.hu-berlin.de>
Date: Thu, 5 Sep 2024 14:35:20 +0200
Subject: [PATCH 12/16] improved plotting functionality for BORUTA fixed
 coercion problem in tables

---
 DESCRIPTION            |   3 +-
 R/boruta.R             | 102 ++++++++++++++++++-----------------------
 R/toTable.R            |   9 ++--
 man/boruta.Rd          |  31 +++++++++++--
 man/semtree.control.Rd |   4 +-
 5 files changed, 80 insertions(+), 69 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2c97d4f..7ccbd7f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,7 +26,8 @@ Imports:
     future.apply,
     data.table,
     expm,
-    gridBase
+    gridBase,
+    dplyr
 Suggests:
     knitr,
     rmarkdown,
diff --git a/R/boruta.R b/R/boruta.R
index 0ba714b..ec87a56 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -27,7 +27,6 @@
 #' 
 #' @keywords tree models multivariate
 #' @export
-
 boruta <- function(model,
                    data,
                    control = NULL,
@@ -38,63 +37,21 @@ boruta <- function(model,
                    verbose=FALSE,
                    ...) {
   
-  # initial checks
-  stopifnot(percentile_threshold>=0)
-  stopifnot(percentile_threshold<=1)
-  stopifnot(is.numeric(rounds))
-  stopifnot(rounds>0)
-  
-  preds_important <- c()
-  preds_unimportant <- c()
-
-  cur_round = 1
-  temp_vims <- list()
-   
-  while(cur_round <= rounds) {
-  vim_boruta <- .boruta(model=model,
-          data=data,
-          control=control,
-          predictors=predictors,
-          percentile_threshold = percentile_threshold,
-          ...) 
-  browser()
-  # add predictors to list of unimportant variables
-  preds_unimportant <- c(preds_unimportant, names(vim_boruta$filter)[!vim_boruta$filter])
-  # remove them from the dataset
-  data <- data[, -c(preds_unimportant)]
-  temp_vims[[cur_round]] <-vim_boruta
-  }
-  
-  result <- list(
-    preds_unimportant,
-    rounds = rounds
-  )
   
-  return(result)
-}
-
-.boruta <- function(model,
-                   data,
-                   control = NULL,
-                   predictors = NULL,
-                   percentile_threshold = 1,
-                   num_shadows = 1,
-                   ...) {
-  
-  # make sure that no column names start with "shadow_" prefix
-  stopifnot(all(sapply(names(data), function(x) {!startsWith(x, "shadow_")})))
-
   # detect model (warning: duplicated code)
-  if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) {
-    tmp <- getPredictorsOpenMx(mxmodel = model, dataset = data, covariates = predictors)
-
-  } else if (inherits(model,"lavaan")){
+  if (inherits(model,"MxModel") || inherits(model,"MxRAMModel")) {
+    tmp <- getPredictorsOpenMx(mxmodel=model, dataset=data, covariates=predictors)
+    model.ids <- tmp[[1]]
+    covariate.ids <- tmp[[2]]
+    #  } else if (inherits(model,"lavaan")){
     
-    tmp <- getPredictorsLavaan(model, data, predictors)
+    # } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) {
+    #
   } else {
-    ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!")
+    ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!");
   }
-
+  
+  # initial checks
   # Checks on x & y from the boruta package
   if(length(grep('^shadow',covariate.ids)>0)) 
     stop('Attributes with names starting from "shadow" are reserved for internal use. Please rename them.')
@@ -114,7 +71,7 @@ boruta <- function(model,
   names(impHistory) <- c(predictors, "shadowMin", "shadowMean", "shadowMax")
   decisionList <- data.frame(predictor=predictors, decision = "Tentative",
                              hitCount = 0, raw.p=NA, adjusted.p=NA)
-
+  
   # TODO: Parallelize the first five runs.
   for(runNo in 1:maxRuns) {
     if(verbose) {
@@ -143,10 +100,10 @@ boruta <- function(model,
     
     # run the forest
     forest <- semforest(model, current.data, control, current.predictors, ...)
-  
+    
     # run variable importance
     vim <- varimp(forest)
-  
+    
     # get variable importance from shadow features
     shadow_names <- names(current.data)[shadow.ids]
     agvim <- aggregateVarimp(vim, aggregate="mean")
@@ -195,12 +152,43 @@ boruta <- function(model,
   vim$impHistory <- impHistory
   vim$decisions <- decisionList$decision
   vim$details <- decisionList
-
+  
   vim$filter <- decisionList$decision == "Confirmed"  # Turns into hitreg
   vim$boruta <- TRUE
   
+  class(vim) <- "boruta"
+  
   # TODO: Loop ends here with some reporting.
   
   return(vim)
 }
 
+#' @exportS3Method plot boruta
+plot.boruta = function(vim, type=0, ...) {
+  decisionList = vim$details
+  impHistory = vim$impHistory
+  impHistory <- impHistory |>
+    dplyr::mutate(rnd=1:nrow(impHistory)) |>
+    tidyr::pivot_longer(cols = -last_col()) |> #everything()) |> 
+    dplyr::left_join(data.frame(decisionList), by=join_by("name"=="predictor")) |>
+    dplyr::mutate(decision = case_when(is.na(decision)~"Shadow", .default=decision)) |>
+    dplyr::group_by(name) |> mutate(median_value = median(value,na.rm=TRUE))
+ 
+  if (type==0) {
+   ggplot2::ggplot(impHistory, 
+                  aes(x=stats::reorder(name, median_value), 
+                      y=value, color=decision)) + 
+    ggplot2::geom_boxplot()+
+    ggplot2::xlab("")+
+    ggplot2::ylab("Importance")+
+    scale_color_discrete(name = "Predictor")+
+    theme(axis.text.x = element_text(angle = 45, hjust = 1))
+  } else if (type==1) {
+    ggplot2::ggplot(impHistory,
+                    aes(x=rnd, y=value,group=name,col=name))+geom_line()+ geom_hline(aes(yintercept=median_value,col=name),lwd=2)+
+      xlab("Round")+ylab("Importance")+scale_color_discrete(name = "Predictor")
+  } else {
+    stop("Unknown graph type. Please choose 0 or 1.")
+  }
+  
+}
\ No newline at end of file
diff --git a/R/toTable.R b/R/toTable.R
index 337c235..2be2bf7 100644
--- a/R/toTable.R
+++ b/R/toTable.R
@@ -66,7 +66,6 @@ alls <- unique(alls)
 
 
 # create table
-#covariate.names <-simplify2array(tree$result$btn.matrix[2,])
 covariate.names <- getCovariatesFromTree(tree)
 
 # default is to display all parameters
@@ -79,10 +78,12 @@ all.names <- c(covariate.names, added.param.cols)
 
 str.matrix <- matrix(NA, nrow = length(rowdata),ncol=length(all.names))
 
+# convert to a data frame to avoid coercion to string
+str.matrix <- data.frame(str.matrix)
+
 colnames(str.matrix) <- all.names
 
 for (i in 1:length(rowdata)) {
-#  result.row <- rep(" ",length(covariate.names))
   myrow <- rowdata[[i]][[1]]
   for (j in 1:length(myrow)) {
     myitem <- myrow[[j]]
@@ -126,7 +127,6 @@ for (i in 1:length(rowdata)) {
   }
   }
   
-#  result.string <- paste(result.string,paste(result.row,collapse="\t"),"\n")
 }
 
 ## prune empty columns?
@@ -139,7 +139,7 @@ if (length(is.col.empty)>0) {
 sortby <- apply(str.matrix,2,function(x){sum(!is.na(x))})
 if (!is.null(added.param.cols)) {
  remids <- (dim(str.matrix)[2]-length(added.param.cols)+1):(dim(str.matrix)[2])
- sortby[remids] <- sortby[remids]-999999
+ sortby[remids] <- sortby[remids]-999999 # bad style ^^ #TODO
 }
 sort.ix <- sort(sortby,index.return=TRUE,decreasing = TRUE)$ix
 str.matrix <- str.matrix[, sort.ix]
@@ -156,7 +156,6 @@ str.matrix[is.na(str.matrix)]<-""
 ## and display
 #cat(result.string)
 
-#require("openxlsx")
 
 return(str.matrix)
 
diff --git a/man/boruta.Rd b/man/boruta.Rd
index 4921d25..6a8cc8a 100644
--- a/man/boruta.Rd
+++ b/man/boruta.Rd
@@ -6,7 +6,17 @@
 \alias{print.boruta}
 \title{Run the Boruta algorithm on a sem tree}
 \usage{
-boruta(model, data, control = NULL, predictors = NULL, ...)
+boruta(
+  model,
+  data,
+  control = NULL,
+  predictors = NULL,
+  maxRuns = 30,
+  pAdjMethod = "none",
+  alpha = 0.05,
+  verbose = FALSE,
+  ...
+)
 }
 \arguments{
 \item{model}{A template SEM. Same as in \code{semtree}.}
@@ -17,12 +27,25 @@ boruta(model, data, control = NULL, predictors = NULL, ...)
 
 \item{predictors}{An optional list of covariates. See semtree code example.}
 
-\item{\dots}{Optional parameters.}
+\item{maxRuns}{Maximum number of boruta search cycles}
 
-\item{constraints}{An optional list of covariates. See semtree code example.}
+\item{pAdjMethod}{A value from \link{stats::p.adjust.methods} defining a 
+multiple testing correction method}
+
+\item{alpha}{p-value cutoff for decisionmaking. Default .05}
+
+\item{verbose}{Verbosity level for boruta processing 
+similar to the same argument in \link{semtree.control} and 
+\link{semforest.control}}
+
+\item{\dots}{Optional parameters to undefined subfunctions}
 }
 \value{
-A boruta object.
+A vim object with several elements that need work. 
+         Of particular note, `$importance` carries mean importance; 
+         `$decision` denotes Accepted/Rejected/Tentative;
+         `$impHistory` has the entire varimp history; and
+         `$details` has exit values for each parameter.
 }
 \description{
 Grows a series of SEM Forests following the boruta algorithm to determine
diff --git a/man/semtree.control.Rd b/man/semtree.control.Rd
index 7ce8847..df9d886 100644
--- a/man/semtree.control.Rd
+++ b/man/semtree.control.Rd
@@ -8,7 +8,7 @@
 \usage{
 semtree.control(
   method = c("naive", "score", "fair", "fair3"),
-  min.N = 20,
+  min.N = NULL,
   max.depth = NA,
   alpha = 0.05,
   alpha.invariance = NA,
@@ -24,7 +24,7 @@ semtree.control(
   report.level = 0,
   exclude.code = NA,
   linear = TRUE,
-  min.bucket = 10,
+  min.bucket = NULL,
   naive.bonferroni.type = 0,
   missing = "ignore",
   use.maxlm = FALSE,

From eb271f431024275a01b151bb1665594f1d3e744c Mon Sep 17 00:00:00 2001
From: LeonieHagitte <leonie.hagitte@student.hu-berlin.de>
Date: Fri, 6 Sep 2024 10:27:55 +0200
Subject: [PATCH 13/16] extension of BORUTA

---
 NAMESPACE  |  1 +
 R/boruta.R | 22 +++++++++++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 0158412..bf7c43e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,6 +8,7 @@ S3method(merge,semforest)
 S3method(nobs,semtree)
 S3method(partialDependence,semforest)
 S3method(partialDependence,semforest_stripped)
+S3method(plot,boruta)
 S3method(plot,diversityMatrix)
 S3method(plot,partialDependence)
 S3method(plot,semforest.proximity)
diff --git a/R/boruta.R b/R/boruta.R
index ec87a56..71059c1 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -43,10 +43,10 @@ boruta <- function(model,
     tmp <- getPredictorsOpenMx(mxmodel=model, dataset=data, covariates=predictors)
     model.ids <- tmp[[1]]
     covariate.ids <- tmp[[2]]
-    #  } else if (inherits(model,"lavaan")){
-    
-    # } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) {
-    #
+  } else if (inherits(model,"lavaan")){
+    tmp <- getPredictorsLavaan(model, dataset=data, covariates=predictors)
+    model.ids <- tmp[[1]]
+    covariate.ids <- tmp[[2]]
   } else {
     ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!");
   }
@@ -172,7 +172,7 @@ plot.boruta = function(vim, type=0, ...) {
     tidyr::pivot_longer(cols = -last_col()) |> #everything()) |> 
     dplyr::left_join(data.frame(decisionList), by=join_by("name"=="predictor")) |>
     dplyr::mutate(decision = case_when(is.na(decision)~"Shadow", .default=decision)) |>
-    dplyr::group_by(name) |> mutate(median_value = median(value,na.rm=TRUE))
+    dplyr::group_by(name) |> dplyr::mutate(median_value = median(value,na.rm=TRUE))
  
   if (type==0) {
    ggplot2::ggplot(impHistory, 
@@ -181,12 +181,16 @@ plot.boruta = function(vim, type=0, ...) {
     ggplot2::geom_boxplot()+
     ggplot2::xlab("")+
     ggplot2::ylab("Importance")+
-    scale_color_discrete(name = "Predictor")+
-    theme(axis.text.x = element_text(angle = 45, hjust = 1))
+    ggplot2::scale_color_discrete(name = "Decision")+
+    ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1))
   } else if (type==1) {
     ggplot2::ggplot(impHistory,
-                    aes(x=rnd, y=value,group=name,col=name))+geom_line()+ geom_hline(aes(yintercept=median_value,col=name),lwd=2)+
-      xlab("Round")+ylab("Importance")+scale_color_discrete(name = "Predictor")
+                    ggplot2::aes(x=rnd, y=value,group=name,col=name))+
+      ggplot2::geom_line()+ 
+      ggplot2::geom_hline(aes(yintercept=median_value,col=name),lwd=2)+
+      ggplot2::xlab("Round")+
+      ggplot2::ylab("Importance")+
+      ggplot2::scale_color_discrete(name = "Predictor")
   } else {
     stop("Unknown graph type. Please choose 0 or 1.")
   }

From 222cb97f91c6444af8afce603d47517a01331c13 Mon Sep 17 00:00:00 2001
From: LeonieHagitte <leonie.hagitte@student.hu-berlin.de>
Date: Fri, 6 Sep 2024 10:42:38 +0200
Subject: [PATCH 14/16] added imports reformatting

---
 R/boruta.R | 186 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 113 insertions(+), 73 deletions(-)

diff --git a/R/boruta.R b/R/boruta.R
index 71059c1..066de45 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -1,30 +1,30 @@
 #' Run the Boruta algorithm on a sem tree
-#' 
+#'
 #' Grows a series of SEM Forests following the boruta algorithm to determine
 #'    feature importance as moderators of the underlying model.
-#' 
-#' 
+#'
+#'
 #' @aliases boruta plot.boruta print.boruta
 #' @param model A template SEM. Same as in \code{semtree}.
 #' @param data A dataframe to boruta on. Same as in \code{semtree}.
 #' @param control A semforest control object to set forest parameters.
 #' @param predictors An optional list of covariates. See semtree code example.
 #' @param maxRuns Maximum number of boruta search cycles
-#' @param pAdjMethod A value from \link{stats::p.adjust.methods} defining a 
+#' @param pAdjMethod A value from \link{stats::p.adjust.methods} defining a
 #'          multiple testing correction method
 #' @param alpha p-value cutoff for decisionmaking. Default .05
-#' @param verbose Verbosity level for boruta processing 
-#'          similar to the same argument in \link{semtree.control} and 
+#' @param verbose Verbosity level for boruta processing
+#'          similar to the same argument in \link{semtree.control} and
 #'          \link{semforest.control}
 #' @param \dots Optional parameters to undefined subfunctions
-#' @return A vim object with several elements that need work. 
-#'          Of particular note, `$importance` carries mean importance; 
+#' @return A vim object with several elements that need work.
+#'          Of particular note, `$importance` carries mean importance;
 #'          `$decision` denotes Accepted/Rejected/Tentative;
 #'          `$impHistory` has the entire varimp history; and
 #'          `$details` has exit values for each parameter.
 #' @author Priyanka Paul, Timothy R. Brick, Andreas Brandmaier
 #' @seealso \code{\link{semtree}} \code{\link{semforest}}
-#' 
+#'
 #' @keywords tree models multivariate
 #' @export
 boruta <- function(model,
@@ -34,99 +34,124 @@ boruta <- function(model,
                    maxRuns = 30,
                    pAdjMethod = "none",
                    alpha = .05,
-                   verbose=FALSE,
+                   verbose = FALSE,
                    ...) {
-  
-  
   # detect model (warning: duplicated code)
-  if (inherits(model,"MxModel") || inherits(model,"MxRAMModel")) {
-    tmp <- getPredictorsOpenMx(mxmodel=model, dataset=data, covariates=predictors)
+  if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) {
+    tmp <-
+      getPredictorsOpenMx(mxmodel = model,
+                          dataset = data,
+                          covariates = predictors)
     model.ids <- tmp[[1]]
     covariate.ids <- tmp[[2]]
-  } else if (inherits(model,"lavaan")){
-    tmp <- getPredictorsLavaan(model, dataset=data, covariates=predictors)
+  } else if (inherits(model, "lavaan")) {
+    tmp <-
+      getPredictorsLavaan(model, dataset = data, covariates = predictors)
     model.ids <- tmp[[1]]
     covariate.ids <- tmp[[2]]
   } else {
-    ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!");
+    ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!")
+    
   }
   
   # initial checks
   # Checks on x & y from the boruta package
-  if(length(grep('^shadow',covariate.ids)>0)) 
-    stop('Attributes with names starting from "shadow" are reserved for internal use. Please rename them.')
-  if(maxRuns<11)
+  if (length(grep('^shadow', covariate.ids) > 0))
+    stop(
+      'Attributes with names starting from "shadow" are reserved for internal use. Please rename them.'
+    )
+  if (maxRuns < 11)
     stop('maxRuns must be greater than 10.')
-  if(!pAdjMethod %in% stats::p.adjust.methods) 
-    stop(c('P-value adjustment method not found. Must be one of:', 
-           stats::p.adjust.methods))
+  if (!pAdjMethod %in% stats::p.adjust.methods)
+    stop(c(
+      'P-value adjustment method not found. Must be one of:',
+      stats::p.adjust.methods
+    ))
   
   # Might clash with some other semtrees stuff
-  if(is.null(predictors)) {
+  if (is.null(predictors)) {
     predictors <- names(data)[covariate.ids]
   }
   
   # Initialize and then loop over runs:
-  impHistory <- data.frame(matrix(NA, nrow=0, ncol=length(predictors)+3))
-  names(impHistory) <- c(predictors, "shadowMin", "shadowMean", "shadowMax")
-  decisionList <- data.frame(predictor=predictors, decision = "Tentative",
-                             hitCount = 0, raw.p=NA, adjusted.p=NA)
+  impHistory <-
+    data.frame(matrix(NA, nrow = 0, ncol = length(predictors) + 3))
+  names(impHistory) <-
+    c(predictors, "shadowMin", "shadowMean", "shadowMax")
+  decisionList <-
+    data.frame(
+      predictor = predictors,
+      decision = "Tentative",
+      hitCount = 0,
+      raw.p = NA,
+      adjusted.p = NA
+    )
   
   # TODO: Parallelize the first five runs.
-  for(runNo in 1:maxRuns) {
-    if(verbose) {
+  for (runNo in 1:maxRuns) {
+    if (verbose) {
       message(paste("Beginning Run", runNo))
     }
     
     # stage 1 - create shadow features
-    rejected <- decisionList$predictor[decisionList$decision == "Rejected"]
+    rejected <-
+      decisionList$predictor[decisionList$decision == "Rejected"]
     current.predictors <- setdiff(predictors, rejected)
-    current.covariate.ids <- setdiff(covariate.ids, names(data) %in% rejected)
+    current.covariate.ids <-
+      setdiff(covariate.ids, names(data) %in% rejected)
     current.data <- data[, setdiff(names(data), rejected)]
     
-    shadow.ids <- (ncol(current.data)+1):(ncol(current.data)+length(current.covariate.ids))
+    shadow.ids <-
+      (ncol(current.data) + 1):(ncol(current.data) + length(current.covariate.ids))
     
     for (cur_cov_id in current.covariate.ids) {
       # pick column and shuffle
       temp_column <- current.data[, cur_cov_id]
-      temp_column <- sample(temp_column, length(temp_column), replace=FALSE)
+      temp_column <-
+        sample(temp_column, length(temp_column), replace = FALSE)
       # add to dataset as shadow feature
-      temp_colname <- paste0("shadow_", names(current.data)[cur_cov_id],collapse="")
+      temp_colname <-
+        paste0("shadow_", names(current.data)[cur_cov_id], collapse = "")
       current.data[temp_colname] <- temp_column
-      if (!is.null(current.predictors)) current.predictors <- c(current.predictors, temp_colname)
+      if (!is.null(current.predictors))
+        current.predictors <- c(current.predictors, temp_colname)
     }
     
     # TODO: Pre-run model if needed.
     
     # run the forest
-    forest <- semforest(model, current.data, control, current.predictors, ...)
+    forest <-
+      semforest(model, current.data, control, current.predictors, ...)
     
     # run variable importance
     vim <- varimp(forest)
     
     # get variable importance from shadow features
     shadow_names <- names(current.data)[shadow.ids]
-    agvim <- aggregateVarimp(vim, aggregate="mean")
+    agvim <- aggregateVarimp(vim, aggregate = "mean")
     
     # Compute shadow stats
-    shadow_importances <- agvim[names(agvim)%in%shadow_names]
-    impHistory[runNo, "shadowMax"] <- max_shadow_importance <- max(shadow_importances)  
+    shadow_importances <- agvim[names(agvim) %in% shadow_names]
+    impHistory[runNo, "shadowMax"] <-
+      max_shadow_importance <- max(shadow_importances)
     impHistory[runNo, "shadowMin"] <- min(shadow_importances)
     impHistory[runNo, "shadowMean"] <- mean(shadow_importances)
-    agvim_filtered <- agvim[!(names(agvim)%in%shadow_names)]
+    agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)]
     impHistory[runNo, names(agvim_filtered)] <- agvim_filtered
     
-    # Compute "hits" 
-    hits <- decisionList$predictor %in% names(agvim_filtered[agvim_filtered>max_shadow_importance])
+    # Compute "hits"
+    hits <-
+      decisionList$predictor %in% names(agvim_filtered[agvim_filtered > max_shadow_importance])
     decisionList$hitCount[hits] <- decisionList$hitCount[hits] + 1
     
-    # Run tests. 
-    # The biasing here means that there are no decisions without correction 
+    # Run tests.
+    # The biasing here means that there are no decisions without correction
     #   before 5 runs and no decisions with Bonferroni before 7 runs.
     
     # Run confirmation tests (pulled from Boruta package)
-    newPs <- stats::pbinom(decisionList$hitCount-1,runNo,0.5,lower.tail=FALSE)
-    adjPs <- stats::p.adjust(newPs,method=pAdjMethod)
+    newPs <-
+      stats::pbinom(decisionList$hitCount - 1, runNo, 0.5, lower.tail = FALSE)
+    adjPs <- stats::p.adjust(newPs, method = pAdjMethod)
     acceptable <- adjPs < alpha
     updateList <- acceptable & decisionList$decision == "Tentative"
     decisionList$raw.p[updateList] <- newPs[updateList]
@@ -134,15 +159,16 @@ boruta <- function(model,
     decisionList$decision[updateList] <- "Confirmed"
     
     # Run rejection tests (pulled from Boruta package)
-    newPs <- stats::pbinom(decisionList$hitCount,runNo,0.5,lower.tail=TRUE)
-    adjPs <- stats::p.adjust(newPs,method=pAdjMethod)
+    newPs <-
+      stats::pbinom(decisionList$hitCount, runNo, 0.5, lower.tail = TRUE)
+    adjPs <- stats::p.adjust(newPs, method = pAdjMethod)
     acceptable <- adjPs < alpha
     updateList <- acceptable & decisionList$decision == "Tentative"
     decisionList$raw.p[updateList] <- newPs[updateList]
     decisionList$adjusted.p[updateList] <- adjPs[updateList]
     decisionList$decision[updateList] <- "Rejected"
     
-    if(!any(decisionList$decision == "Tentative")) {
+    if (!any(decisionList$decision == "Tentative")) {
       break
     }
     
@@ -153,7 +179,8 @@ boruta <- function(model,
   vim$decisions <- decisionList$decision
   vim$details <- decisionList
   
-  vim$filter <- decisionList$decision == "Confirmed"  # Turns into hitreg
+  vim$filter <-
+    decisionList$decision == "Confirmed"  # Turns into hitreg
   vim$boruta <- TRUE
   
   class(vim) <- "boruta"
@@ -163,33 +190,46 @@ boruta <- function(model,
   return(vim)
 }
 
+
 #' @exportS3Method plot boruta
-plot.boruta = function(vim, type=0, ...) {
+plot.boruta = function(vim, type = 0, ...) {
   decisionList = vim$details
   impHistory = vim$impHistory
   impHistory <- impHistory |>
-    dplyr::mutate(rnd=1:nrow(impHistory)) |>
-    tidyr::pivot_longer(cols = -last_col()) |> #everything()) |> 
-    dplyr::left_join(data.frame(decisionList), by=join_by("name"=="predictor")) |>
-    dplyr::mutate(decision = case_when(is.na(decision)~"Shadow", .default=decision)) |>
-    dplyr::group_by(name) |> dplyr::mutate(median_value = median(value,na.rm=TRUE))
- 
-  if (type==0) {
-   ggplot2::ggplot(impHistory, 
-                  aes(x=stats::reorder(name, median_value), 
-                      y=value, color=decision)) + 
-    ggplot2::geom_boxplot()+
-    ggplot2::xlab("")+
-    ggplot2::ylab("Importance")+
-    ggplot2::scale_color_discrete(name = "Decision")+
-    ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1))
-  } else if (type==1) {
+    dplyr::mutate(rnd = 1:nrow(impHistory)) |>
+    tidyr::pivot_longer(cols = -last_col()) |> #everything()) |>
+    dplyr::left_join(data.frame(decisionList),
+                     by = dplyr::join_by("name" == "predictor")) |>
+    dplyr::mutate(decision =
+                    case_when(is.na(decision) ~ "Shadow", .default = decision)) |>
+    dplyr::group_by(name) |>
+    dplyr::mutate(median_value = median(value, na.rm = TRUE))
+  
+  if (type == 0) {
+    ggplot2::ggplot(impHistory,
+                    ggplot2::aes(
+                      x = stats::reorder(name, median_value),
+                      y = value,
+                      color = decision
+                    )) +
+      ggplot2::geom_boxplot() +
+      ggplot2::xlab("") +
+      ggplot2::ylab("Importance") +
+      ggplot2::scale_color_discrete(name = "Decision") +
+      ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1))
+  } else if (type == 1) {
     ggplot2::ggplot(impHistory,
-                    ggplot2::aes(x=rnd, y=value,group=name,col=name))+
-      ggplot2::geom_line()+ 
-      ggplot2::geom_hline(aes(yintercept=median_value,col=name),lwd=2)+
-      ggplot2::xlab("Round")+
-      ggplot2::ylab("Importance")+
+                    ggplot2::aes(
+                      x = rnd,
+                      y = value,
+                      group = name,
+                      col = name
+                    )) +
+      ggplot2::geom_line() +
+      ggplot2::geom_hline(aes(yintercept = median_value, col = name), lwd =
+                            2) +
+      ggplot2::xlab("Round") +
+      ggplot2::ylab("Importance") +
       ggplot2::scale_color_discrete(name = "Predictor")
   } else {
     stop("Unknown graph type. Please choose 0 or 1.")

From b9d6ba430e5913df81c93cf136c6a0c5e85cbf21 Mon Sep 17 00:00:00 2001
From: brandmaier <brandmaier@mpib-berlin.mpg.de>
Date: Wed, 18 Sep 2024 15:09:46 +0200
Subject: [PATCH 15/16] added quantiles

---
 R/boruta.R | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/R/boruta.R b/R/boruta.R
index 066de45..a765f14 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -88,9 +88,13 @@ boruta <- function(model,
     )
   
   # TODO: Parallelize the first five runs.
+  end_time <- NULL
   for (runNo in 1:maxRuns) {
+    start_time <- Sys.time()
     if (verbose) {
-      message(paste("Beginning Run", runNo))
+      time_info <- ""
+      if (!is.null(end_time)) time_info <- paste0("Last run took", (end_time-start_time)) 
+        message(paste("Beginning Run", runNo," ",time_info))
     }
     
     # stage 1 - create shadow features
@@ -133,9 +137,9 @@ boruta <- function(model,
     # Compute shadow stats
     shadow_importances <- agvim[names(agvim) %in% shadow_names]
     impHistory[runNo, "shadowMax"] <-
-      max_shadow_importance <- max(shadow_importances)
-    impHistory[runNo, "shadowMin"] <- min(shadow_importances)
-    impHistory[runNo, "shadowMean"] <- mean(shadow_importances)
+      max_shadow_importance <- max(shadow_importances, na.rm=TRUE)
+    impHistory[runNo, "shadowMin"] <- min(shadow_importances, na.rm=TRUE)
+    impHistory[runNo, "shadowMean"] <- mean(shadow_importances, na.rm=TRUE)
     agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)]
     impHistory[runNo, names(agvim_filtered)] <- agvim_filtered
     
@@ -172,6 +176,8 @@ boruta <- function(model,
       break
     }
     
+    end_time <- Sys.time()
+    
   }
   
   vim$importance <- colMeans(impHistory, na.rm = TRUE)
@@ -201,14 +207,21 @@ plot.boruta = function(vim, type = 0, ...) {
     dplyr::left_join(data.frame(decisionList),
                      by = dplyr::join_by("name" == "predictor")) |>
     dplyr::mutate(decision =
-                    case_when(is.na(decision) ~ "Shadow", .default = decision)) |>
+                    dplyr::case_when(is.na(decision) ~ "Shadow", .default = decision)) |>
     dplyr::group_by(name) |>
     dplyr::mutate(median_value = median(value, na.rm = TRUE))
   
   if (type == 0) {
+    
+    # sort Inf values to the left
+    #impHistory[is.na(impHistory$median_value)] <- -Inf
+    impHistory$sort_value <- impHistory$median_value
+  #  mv <- min(impHistory$sort_value,na.rm=TRUE)
+   # impHistory$sort_value[impHistory$decision=="Rejected"]<- (mv-1)
+    
     ggplot2::ggplot(impHistory,
                     ggplot2::aes(
-                      x = stats::reorder(name, median_value),
+                      x = stats::reorder(name, sort_value),
                       y = value,
                       color = decision
                     )) +
@@ -216,7 +229,7 @@ plot.boruta = function(vim, type = 0, ...) {
       ggplot2::xlab("") +
       ggplot2::ylab("Importance") +
       ggplot2::scale_color_discrete(name = "Decision") +
-      ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1))
+      ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))
   } else if (type == 1) {
     ggplot2::ggplot(impHistory,
                     ggplot2::aes(
@@ -226,7 +239,7 @@ plot.boruta = function(vim, type = 0, ...) {
                       col = name
                     )) +
       ggplot2::geom_line() +
-      ggplot2::geom_hline(aes(yintercept = median_value, col = name), lwd =
+      ggplot2::geom_hline(ggplot2::aes(yintercept = median_value, col = name), lwd =
                             2) +
       ggplot2::xlab("Round") +
       ggplot2::ylab("Importance") +

From fd4116f2a22b7340e381211f00570fbcae5c7099 Mon Sep 17 00:00:00 2001
From: brandmaier <brandmaier@mpib-berlin.mpg.de>
Date: Mon, 30 Sep 2024 08:50:43 +0200
Subject: [PATCH 16/16] add quantile function

---
 R/boruta.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/boruta.R b/R/boruta.R
index a765f14..5831899 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -35,6 +35,7 @@ boruta <- function(model,
                    pAdjMethod = "none",
                    alpha = .05,
                    verbose = FALSE,
+                   quant = 1,
                    ...) {
   # detect model (warning: duplicated code)
   if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) {
@@ -136,8 +137,11 @@ boruta <- function(model,
     
     # Compute shadow stats
     shadow_importances <- agvim[names(agvim) %in% shadow_names]
-    impHistory[runNo, "shadowMax"] <-
-      max_shadow_importance <- max(shadow_importances, na.rm=TRUE)
+    impHistory[runNo, "shadowMax"] <- max(shadow_importances, na.rm=TRUE)
+    
+    max_shadow_importance <- stats::quantile(shadow_importances,
+                                             probs=quant,na.rm=TRUE)
+    
     impHistory[runNo, "shadowMin"] <- min(shadow_importances, na.rm=TRUE)
     impHistory[runNo, "shadowMean"] <- mean(shadow_importances, na.rm=TRUE)
     agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)]