From c14c555153fe4c8f694667582f50702f3166fe33 Mon Sep 17 00:00:00 2001
From: brandmaier <brandmaier@mpib-berlin.mpg.de>
Date: Mon, 30 Sep 2024 08:59:07 +0200
Subject: [PATCH] manual copy of BORUTA progress into master (because of mess
 with author names in commits)

---
 DESCRIPTION                           |   1 +
 NAMESPACE                             |   2 +
 R/boruta.R                            | 344 ++++++++++++++++++--------
 R/toTable.R                           | 201 ++++++++-------
 man/boruta.Rd                         |  60 +++--
 man/semtree.control.Rd                |   4 +-
 tests/testthat/boruta_one_indicator.R |  26 ++
 7 files changed, 407 insertions(+), 231 deletions(-)
 create mode 100644 tests/testthat/boruta_one_indicator.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 2c97d4f..5baa44c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,6 +17,7 @@ Imports:
     cluster,
     ggplot2,
     tidyr,
+    dplyr,
     methods,
     strucchange,
     sandwich,
diff --git a/NAMESPACE b/NAMESPACE
index 0158412..54458c4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,9 +5,11 @@ S3method("[",semtree)
 S3method(evaluate,semforest)
 S3method(logLik,semtree)
 S3method(merge,semforest)
+S3method(merge,semforest.varimp)
 S3method(nobs,semtree)
 S3method(partialDependence,semforest)
 S3method(partialDependence,semforest_stripped)
+S3method(plot,boruta)
 S3method(plot,diversityMatrix)
 S3method(plot,partialDependence)
 S3method(plot,semforest.proximity)
diff --git a/R/boruta.R b/R/boruta.R
index f3cfcca..621060e 100644
--- a/R/boruta.R
+++ b/R/boruta.R
@@ -1,131 +1,255 @@
-#' BORUTA algorithm for SEM trees
-#' 
-#' This is an experimental feature. Use cautiously.
-#' 
-#' @aliases boruta
-#' @param model A template model specification from \code{\link{OpenMx}} using
-#' the \code{\link{mxModel}} function (or a \code{\link[lavaan]{lavaan}} model
-#' using the \code{\link[lavaan]{lavaan}} function with option fit=FALSE).
-#' Model must be syntactically correct within the framework chosen, and
-#' converge to a solution.
-#' @param data Data.frame used in the model creation using
-#' \code{\link{mxModel}} or \code{\link[lavaan]{lavaan}} are input here. Order
-#' of modeled variables and predictors is not important when providing a
-#' dataset to \code{semtree}.
-#' @param control \code{\link{semtree}} model specifications from
-#' \code{\link{semtree.control}} are input here. Any changes from the default
-#' setting can be specified here.
-#' @param percentile_threshold Numeric.
-#' @param rounds Numeric. Number of rounds of the BORUTA algorithm.
+#' Run the Boruta algorithm on a sem tree
 #'
+#' Grows a series of SEM Forests following the boruta algorithm to determine
+#'    feature importance as moderators of the underlying model.
+#'
+#'
+#' @aliases boruta plot.boruta print.boruta
+#' @param model A template SEM. Same as in \code{semtree}.
+#' @param data A dataframe to boruta on. Same as in \code{semtree}.
+#' @param control A semforest control object to set forest parameters.
+#' @param predictors An optional list of covariates. See semtree code example.
+#' @param maxRuns Maximum number of boruta search cycles
+#' @param pAdjMethod A value from \link{stats::p.adjust.methods} defining a
+#'          multiple testing correction method
+#' @param alpha p-value cutoff for decisionmaking. Default .05
+#' @param verbose Verbosity level for boruta processing
+#'          similar to the same argument in \link{semtree.control} and
+#'          \link{semforest.control}
+#' @param \dots Optional parameters to undefined subfunctions
+#' @return A vim object with several elements that need work.
+#'          Of particular note, `$importance` carries mean importance;
+#'          `$decision` denotes Accepted/Rejected/Tentative;
+#'          `$impHistory` has the entire varimp history; and
+#'          `$details` has exit values for each parameter.
+#' @author Priyanka Paul, Timothy R. Brick, Andreas Brandmaier
+#' @seealso \code{\link{semtree}} \code{\link{semforest}}
+#'
+#' @keywords tree models multivariate
 #' @export
-#' 
 boruta <- function(model,
                    data,
                    control = NULL,
                    predictors = NULL,
-                   percentile_threshold = 1,
-                   rounds = 1,
+                   maxRuns = 30,
+                   pAdjMethod = "none",
+                   alpha = .05,
+                   verbose = FALSE,
+                   quant = 1,
                    ...) {
+  # detect model (warning: duplicated code)
+  if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) {
+    tmp <-
+      getPredictorsOpenMx(mxmodel = model,
+                          dataset = data,
+                          covariates = predictors)
+    model.ids <- tmp[[1]]
+    covariate.ids <- tmp[[2]]
+  } else if (inherits(model, "lavaan")) {
+    tmp <-
+      getPredictorsLavaan(model, dataset = data, covariates = predictors)
+    model.ids <- tmp[[1]]
+    covariate.ids <- tmp[[2]]
+  } else {
+    ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!")
+    
+  }
   
   # initial checks
-  stopifnot(percentile_threshold>=0)
-  stopifnot(percentile_threshold<=1)
-  stopifnot(is.numeric(rounds))
-  stopifnot(rounds>0)
+  # Checks on x & y from the boruta package
+  if (length(grep('^shadow', covariate.ids) > 0))
+    stop(
+      'Attributes with names starting from "shadow" are reserved for internal use. Please rename them.'
+    )
+  if (maxRuns < 11)
+    stop('maxRuns must be greater than 10.')
+  if (!pAdjMethod %in% stats::p.adjust.methods)
+    stop(c(
+      'P-value adjustment method not found. Must be one of:',
+      stats::p.adjust.methods
+    ))
   
-  preds_important <- c()
-  preds_unimportant <- c()
-
-  cur_round = 1
-  temp_vims <- list()
-   
-  while(cur_round <= rounds) {
-  vim_boruta <- .boruta(model=model,
-          data=data,
-          control=control,
-          predictors=predictors,
-          percentile_threshold = percentile_threshold,
-          ...) 
-  browser()
-  # add predictors to list of unimportant variables
-  preds_unimportant <- c(preds_unimportant, names(vim_boruta$filter)[!vim_boruta$filter])
-  # remove them from the dataset
-  data <- data[, -c(preds_unimportant)]
-  temp_vims[[cur_round]] <-vim_boruta
+  # Might clash with some other semtrees stuff
+  if (is.null(predictors)) {
+    predictors <- names(data)[covariate.ids]
   }
   
-  result <- list(
-    preds_unimportant,
-    rounds = rounds
-  )
+  # Initialize and then loop over runs:
+  impHistory <-
+    data.frame(matrix(NA, nrow = 0, ncol = length(predictors) + 3))
+  names(impHistory) <-
+    c(predictors, "shadowMin", "shadowMean", "shadowMax")
+  decisionList <-
+    data.frame(
+      predictor = predictors,
+      decision = "Tentative",
+      hitCount = 0,
+      raw.p = NA,
+      adjusted.p = NA
+    )
   
-  return(result)
-}
-
-.boruta <- function(model,
-                   data,
-                   control = NULL,
-                   predictors = NULL,
-                   percentile_threshold = 1,
-                   num_shadows = 1,
-                   ...) {
-  
-  # make sure that no column names start with "shadow_" prefix
-  stopifnot(all(sapply(names(data), function(x) {!startsWith(x, "shadow_")})))
-
-  # detect model (warning: duplicated code)
-  if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) {
-    tmp <- getPredictorsOpenMx(mxmodel = model, dataset = data, covariates = predictors)
-
-  } else if (inherits(model,"lavaan")){
+  # TODO: Parallelize the first five runs.
+  end_time <- NULL
+  for (runNo in 1:maxRuns) {
+    start_time <- Sys.time()
+    if (verbose) {
+      time_info <- ""
+      if (!is.null(end_time)) time_info <- paste0("Last run took", (end_time-start_time)) 
+      message(paste("Beginning Run", runNo," ",time_info))
+    }
     
-    tmp <- getPredictorsLavaan(model, data, predictors)
-  } else {
-    ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!")
-  }
-  model.ids <- tmp[[1]]
-  covariate.ids <- tmp[[2]]
-
-  # stage 1 - create shadow features
-
-  shadow.ids <- (ncol(data) + 1):(ncol(data) + length(covariate.ids))
-
-  for (cur_cov_id in covariate.ids) {
-    for (rep_id in 1:num_shadows) {
-    # pick column and shuffle
-    temp_column <- data[, cur_cov_id]
-    temp_column <- sample(temp_column, length(temp_column), replace = FALSE)
-    # add to dataset as shadow feature
-    temp_colname <- paste0("shadow_", names(data)[cur_cov_id], collapse = "")
-    if (num_shadows>1) temp_colname <- paste0(temp_colname, rep_id, collapse = "")
-    data[temp_colname] <- temp_column
-    if (!is.null(predictors)) predictors <- c(predictors, temp_colname)
+    # stage 1 - create shadow features
+    rejected <-
+      decisionList$predictor[decisionList$decision == "Rejected"]
+    current.predictors <- setdiff(predictors, rejected)
+    current.covariate.ids <-
+      setdiff(covariate.ids, names(data) %in% rejected)
+    current.data <- data[, setdiff(names(data), rejected)]
+    
+    shadow.ids <-
+      (ncol(current.data) + 1):(ncol(current.data) + length(current.covariate.ids))
+    
+    for (cur_cov_id in current.covariate.ids) {
+      # pick column and shuffle
+      temp_column <- current.data[, cur_cov_id]
+      temp_column <-
+        sample(temp_column, length(temp_column), replace = FALSE)
+      # add to dataset as shadow feature
+      temp_colname <-
+        paste0("shadow_", names(current.data)[cur_cov_id], collapse = "")
+      current.data[temp_colname] <- temp_column
+      if (!is.null(current.predictors))
+        current.predictors <- c(current.predictors, temp_colname)
+    }
+    
+    # TODO: Pre-run model if needed.
+    
+    # run the forest
+    forest <-
+      semforest(model, current.data, control, current.predictors, ...)
+    
+    # run variable importance
+    vim <- varimp(forest)
+    
+    # get variable importance from shadow features
+    shadow_names <- names(current.data)[shadow.ids]
+    agvim <- aggregateVarimp(vim, aggregate = "mean")
+    
+    # Compute shadow stats
+    shadow_importances <- agvim[names(agvim) %in% shadow_names]
+    impHistory[runNo, "shadowMax"] <- max(shadow_importances, na.rm=TRUE)
+    
+    max_shadow_importance <- stats::quantile(shadow_importances,
+                                             probs=quant,na.rm=TRUE)
+    
+    impHistory[runNo, "shadowMin"] <- min(shadow_importances, na.rm=TRUE)
+    impHistory[runNo, "shadowMean"] <- mean(shadow_importances, na.rm=TRUE)
+    agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)]
+    impHistory[runNo, names(agvim_filtered)] <- agvim_filtered
+    
+    # Compute "hits"
+    hits <-
+      decisionList$predictor %in% names(agvim_filtered[agvim_filtered > max_shadow_importance])
+    decisionList$hitCount[hits] <- decisionList$hitCount[hits] + 1
+    
+    # Run tests.
+    # The biasing here means that there are no decisions without correction
+    #   before 5 runs and no decisions with Bonferroni before 7 runs.
+    
+    # Run confirmation tests (pulled from Boruta package)
+    newPs <-
+      stats::pbinom(decisionList$hitCount - 1, runNo, 0.5, lower.tail = FALSE)
+    adjPs <- stats::p.adjust(newPs, method = pAdjMethod)
+    acceptable <- adjPs < alpha
+    updateList <- acceptable & decisionList$decision == "Tentative"
+    decisionList$raw.p[updateList] <- newPs[updateList]
+    decisionList$adjusted.p[updateList] <- adjPs[updateList]
+    decisionList$decision[updateList] <- "Confirmed"
+    
+    # Run rejection tests (pulled from Boruta package)
+    newPs <-
+      stats::pbinom(decisionList$hitCount, runNo, 0.5, lower.tail = TRUE)
+    adjPs <- stats::p.adjust(newPs, method = pAdjMethod)
+    acceptable <- adjPs < alpha
+    updateList <- acceptable & decisionList$decision == "Tentative"
+    decisionList$raw.p[updateList] <- newPs[updateList]
+    decisionList$adjusted.p[updateList] <- adjPs[updateList]
+    decisionList$decision[updateList] <- "Rejected"
+    
+    if (!any(decisionList$decision == "Tentative")) {
+      break
     }
+    
+    end_time <- Sys.time()
+    
   }
-
-  # run the forest
-  forest <- semforest(model, data, control, predictors, ...)
-
-  # run variable importance
-  vim <- varimp(forest)
-
-  # get variable importance from shadow features
-  shadow_names <- names(data)[shadow.ids]
-  agvim <- aggregateVarimp(vim, aggregate = "mean")
   
-  vals <- agvim[names(agvim) %in% shadow_names]
-  #max_shadow_importance <- max(vals)
-  max_shadow_importance <- quantile(vals, percentile_threshold)
+  vim$importance <- colMeans(impHistory, na.rm = TRUE)
+  vim$impHistory <- impHistory
+  vim$decisions <- decisionList$decision
+  vim$details <- decisionList
   
-  agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)]
-
-  df <- data.frame(importance = agvim_filtered, predictor = names(agvim_filtered))
-
-  vim$filter <- agvim_filtered > max_shadow_importance
+  vim$filter <-
+    decisionList$decision == "Confirmed"  # Turns into hitreg
   vim$boruta <- TRUE
-  vim$boruta_threshold <- max_shadow_importance
-  vim$percentile_threshold <- percentile_threshold
-
+  
+  class(vim) <- "boruta"
+  
+  # TODO: Loop ends here with some reporting.
+  
   return(vim)
 }
+
+
+#' @exportS3Method plot boruta
+plot.boruta = function(vim, type = 0, ...) {
+  decisionList = vim$details
+  impHistory = vim$impHistory
+  impHistory <- impHistory |>
+    dplyr::mutate(rnd = 1:nrow(impHistory)) |>
+    tidyr::pivot_longer(cols = -last_col()) |> #everything()) |>
+    dplyr::left_join(data.frame(decisionList),
+                     by = dplyr::join_by("name" == "predictor")) |>
+    dplyr::mutate(decision =
+                    dplyr::case_when(is.na(decision) ~ "Shadow", .default = decision)) |>
+    dplyr::group_by(name) |>
+    dplyr::mutate(median_value = median(value, na.rm = TRUE))
+  
+  if (type == 0) {
+    
+    # sort Inf values to the left
+    #impHistory[is.na(impHistory$median_value)] <- -Inf
+    impHistory$sort_value <- impHistory$median_value
+    #  mv <- min(impHistory$sort_value,na.rm=TRUE)
+    # impHistory$sort_value[impHistory$decision=="Rejected"]<- (mv-1)
+    
+    ggplot2::ggplot(impHistory,
+                    ggplot2::aes(
+                      x = stats::reorder(name, sort_value),
+                      y = value,
+                      color = decision
+                    )) +
+      ggplot2::geom_boxplot() +
+      ggplot2::xlab("") +
+      ggplot2::ylab("Importance") +
+      ggplot2::scale_color_discrete(name = "Decision") +
+      ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))
+  } else if (type == 1) {
+    ggplot2::ggplot(impHistory,
+                    ggplot2::aes(
+                      x = rnd,
+                      y = value,
+                      group = name,
+                      col = name
+                    )) +
+      ggplot2::geom_line() +
+      ggplot2::geom_hline(ggplot2::aes(yintercept = median_value, col = name), lwd =
+                            2) +
+      ggplot2::xlab("Round") +
+      ggplot2::ylab("Importance") +
+      ggplot2::scale_color_discrete(name = "Predictor")
+  } else {
+    stop("Unknown graph type. Please choose 0 or 1.")
+  }
+  
+}
\ No newline at end of file
diff --git a/R/toTable.R b/R/toTable.R
index 337c235..f7a96ac 100644
--- a/R/toTable.R
+++ b/R/toTable.R
@@ -48,116 +48,115 @@ treeToTable <- function(tree, colDataList=list(), result=list())
 #' Psychology}.
 #' @export
 toTable <- function(tree, added.param.cols=NULL, round.param=NULL) {
-
-
   
-# collect all data
-alls <- c()
-rowdata <- treeToTable(tree)
-for (i in 1:length(rowdata)) {
-  myrow <- rowdata[[i]][[1]]
-  for (j in 1:length(myrow)) {
-    myitem <- myrow[[j]]
-    alls[length(alls)+1] <- myitem[[1]]$name
+  
+  
+  # collect all data
+  alls <- c()
+  rowdata <- treeToTable(tree)
+  for (i in 1:length(rowdata)) {
+    myrow <- rowdata[[i]][[1]]
+    for (j in 1:length(myrow)) {
+      myitem <- myrow[[j]]
+      alls[length(alls)+1] <- myitem[[1]]$name
+    }
   }
-}
-alls <- unique(alls)
-# collect all variables
-
-
-# create table
-#covariate.names <-simplify2array(tree$result$btn.matrix[2,])
-covariate.names <- getCovariatesFromTree(tree)
-
-# default is to display all parameters
-if (is.null(added.param.cols)) {
-  added.param.cols <- names(tree$params)
-}
-
-# all column names for the table to be generated (covariate names and parameter names)
-all.names <- c(covariate.names, added.param.cols)
-
-str.matrix <- matrix(NA, nrow = length(rowdata),ncol=length(all.names))
-
-colnames(str.matrix) <- all.names
-
-for (i in 1:length(rowdata)) {
-#  result.row <- rep(" ",length(covariate.names))
-  myrow <- rowdata[[i]][[1]]
-  for (j in 1:length(myrow)) {
-    myitem <- myrow[[j]]
-    varid <- which(covariate.names==myitem[[1]]$name)
-    state <- myitem[[2]]
-    if (state==1) { # state encodes whether TRUE OR FALSE
-      if (myitem[[1]]$relation==">=") {
-        rule <- paste(">=",saferound(myitem[[1]]$value,2))
-      } else if (myitem[[1]]$relation=="%in%") {
-        rule <- paste(myitem[[1]]$value, collapse=" or ")
+  alls <- unique(alls)
+  # collect all variables
+  
+  
+  # create table
+  covariate.names <- getCovariatesFromTree(tree)
+  
+  # default is to display all parameters
+  if (is.null(added.param.cols)) {
+    added.param.cols <- names(tree$params)
+  }
+  
+  # all column names for the table to be generated (covariate names and parameter names)
+  all.names <- c(covariate.names, added.param.cols)
+  
+  str.matrix <- matrix(NA, nrow = length(rowdata),ncol=length(all.names))
+  
+  # convert to a data frame to avoid coercion to string
+  str.matrix <- data.frame(str.matrix)
+  
+  colnames(str.matrix) <- all.names
+  
+  for (i in 1:length(rowdata)) {
+    myrow <- rowdata[[i]][[1]]
+    for (j in 1:length(myrow)) {
+      myitem <- myrow[[j]]
+      varid <- which(covariate.names==myitem[[1]]$name)
+      state <- myitem[[2]]
+      if (state==1) { # state encodes whether TRUE OR FALSE
+        if (myitem[[1]]$relation==">=") {
+          rule <- paste(">=",saferound(myitem[[1]]$value,2))
+        } else if (myitem[[1]]$relation=="%in%") {
+          rule <- paste(myitem[[1]]$value, collapse=" or ")
+        } else {
+          rule <- "UNKNOWN"
+        }
       } else {
-        rule <- "UNKNOWN"
-      }
-    } else {
-
-      # invert rule
-      rule <- ""
-      if (myitem[[1]]$relation==">=") {
-        rule <- paste("<",saferound(myitem[[1]]$value,2))
-      } else if (myitem[[1]]$relation=="%in%") {
-        rule <- paste("not (",paste(myitem[[1]]$value,collapse=" or "),")")
-      } else 
+        
+        # invert rule
+        rule <- ""
+        if (myitem[[1]]$relation==">=") {
+          rule <- paste("<",saferound(myitem[[1]]$value,2))
+        } else if (myitem[[1]]$relation=="%in%") {
+          rule <- paste("not (",paste(myitem[[1]]$value,collapse=" or "),")")
+        } else 
         { rule <- "UNKNOWN" } 
         
+      }
+      if (is.na(str.matrix[i, varid])) {
+        str.matrix[i,varid] <- rule
+      } else {
+        str.matrix[i,varid] <- paste(str.matrix[i,varid]," & ", rule)
+      }
     }
-    if (is.na(str.matrix[i, varid])) {
-      str.matrix[i,varid] <- rule
-    } else {
-      str.matrix[i,varid] <- paste(str.matrix[i,varid]," & ", rule)
+    # fill variable names
+    if (!is.null(added.param.cols)) {
+      for (j in 1:length(added.param.cols)) {
+        colid <- which(colnames(str.matrix)==added.param.cols[j])
+        param <- rowdata[[i]][[2]][which(names(rowdata[[i]][[2]])==added.param.cols[j])]
+        if (!is.null(round.param)) {
+          param <- saferound(param, round.param)
+        }
+        str.matrix[i,colid] <- param 
+      }
     }
+    
   }
-  # fill variable names
-  if (!is.null(added.param.cols)) {
-  for (j in 1:length(added.param.cols)) {
-    colid <- which(colnames(str.matrix)==added.param.cols[j])
-    param <- rowdata[[i]][[2]][which(names(rowdata[[i]][[2]])==added.param.cols[j])]
-    if (!is.null(round.param)) {
-      param <- saferound(param, round.param)
-    }
-    str.matrix[i,colid] <- param 
+  
+  ## prune empty columns?
+  is.col.empty <- which(apply(str.matrix,2,function(x){all(is.na(x))}))
+  if (length(is.col.empty)>0) {
+    str.matrix <- str.matrix[,-is.col.empty]
   }
+  
+  # sort columns according to number of elements
+  sortby <- apply(str.matrix,2,function(x){sum(!is.na(x))})
+  if (!is.null(added.param.cols)) {
+    remids <- (dim(str.matrix)[2]-length(added.param.cols)+1):(dim(str.matrix)[2])
+    sortby[remids] <- sortby[remids]-999999 # bad style ^^ #TODO
   }
+  sort.ix <- sort(sortby,index.return=TRUE,decreasing = TRUE)$ix
+  str.matrix <- str.matrix[, sort.ix]
+  
+  
+  str.matrix[is.na(str.matrix)]<-""
+  
+  ## format output
+  #result.string <- paste(paste(colnames(str.matrix),collapse="\t"),"\n")
+  #for (i in 1:dim(str.matrix)[1]) {
+  #  result.string <- paste(result.string,paste(str.matrix[i,],collapse="\t"),"\n")
+  #}
+  
+  ## and display
+  #cat(result.string)
+  
+  
+  return(str.matrix)
   
-#  result.string <- paste(result.string,paste(result.row,collapse="\t"),"\n")
-}
-
-## prune empty columns?
-is.col.empty <- which(apply(str.matrix,2,function(x){all(is.na(x))}))
-if (length(is.col.empty)>0) {
- str.matrix <- str.matrix[,-is.col.empty]
-}
-
-# sort columns according to number of elements
-sortby <- apply(str.matrix,2,function(x){sum(!is.na(x))})
-if (!is.null(added.param.cols)) {
- remids <- (dim(str.matrix)[2]-length(added.param.cols)+1):(dim(str.matrix)[2])
- sortby[remids] <- sortby[remids]-999999
-}
-sort.ix <- sort(sortby,index.return=TRUE,decreasing = TRUE)$ix
-str.matrix <- str.matrix[, sort.ix]
-
-
-str.matrix[is.na(str.matrix)]<-""
-
-## format output
-#result.string <- paste(paste(colnames(str.matrix),collapse="\t"),"\n")
-#for (i in 1:dim(str.matrix)[1]) {
-#  result.string <- paste(result.string,paste(str.matrix[i,],collapse="\t"),"\n")
-#}
-
-## and display
-#cat(result.string)
-
-#require("openxlsx")
-
-return(str.matrix)
-
 }
diff --git a/man/boruta.Rd b/man/boruta.Rd
index f0e8b85..8c1fc76 100644
--- a/man/boruta.Rd
+++ b/man/boruta.Rd
@@ -2,38 +2,62 @@
 % Please edit documentation in R/boruta.R
 \name{boruta}
 \alias{boruta}
-\title{BORUTA algorithm for SEM trees}
+\alias{plot.boruta}
+\alias{print.boruta}
+\title{Run the Boruta algorithm on a sem tree}
 \usage{
 boruta(
   model,
   data,
   control = NULL,
   predictors = NULL,
-  percentile_threshold = 100,
-  rounds = 1,
+  maxRuns = 30,
+  pAdjMethod = "none",
+  alpha = 0.05,
+  verbose = FALSE,
+  quant = 1,
   ...
 )
 }
 \arguments{
-\item{model}{A template model specification from \code{\link{OpenMx}} using
-the \code{\link{mxModel}} function (or a \code{\link[lavaan]{lavaan}} model
-using the \code{\link[lavaan]{lavaan}} function with option fit=FALSE).
-Model must be syntactically correct within the framework chosen, and
-converge to a solution.}
+\item{model}{A template SEM. Same as in \code{semtree}.}
 
-\item{data}{Data.frame used in the model creation using
-\code{\link{mxModel}} or \code{\link[lavaan]{lavaan}} are input here. Order
-of modeled variables and predictors is not important when providing a
-dataset to \code{semtree}.}
+\item{data}{A dataframe to boruta on. Same as in \code{semtree}.}
 
-\item{control}{\code{\link{semtree}} model specifications from
-\code{\link{semtree.control}} are input here. Any changes from the default
-setting can be specified here.}
+\item{control}{A semforest control object to set forest parameters.}
 
-\item{percentile_threshold}{Numeric.}
+\item{predictors}{An optional list of covariates. See semtree code example.}
 
-\item{rounds}{Numeric. Number of rounds of the BORUTA algorithm.}
+\item{maxRuns}{Maximum number of boruta search cycles}
+
+\item{pAdjMethod}{A value from \link{stats::p.adjust.methods} defining a
+multiple testing correction method}
+
+\item{alpha}{p-value cutoff for decisionmaking. Default .05}
+
+\item{verbose}{Verbosity level for boruta processing
+similar to the same argument in \link{semtree.control} and
+\link{semforest.control}}
+
+\item{\dots}{Optional parameters to undefined subfunctions}
+}
+\value{
+A vim object with several elements that need work.
+         Of particular note, `$importance` carries mean importance;
+         `$decision` denotes Accepted/Rejected/Tentative;
+         `$impHistory` has the entire varimp history; and
+         `$details` has exit values for each parameter.
 }
 \description{
-BORUTA algorithm for SEM trees
+Grows a series of SEM Forests following the boruta algorithm to determine
+   feature importance as moderators of the underlying model.
+}
+\seealso{
+\code{\link{semtree}} \code{\link{semforest}}
+}
+\author{
+Priyanka Paul, Timothy R. Brick, Andreas Brandmaier
 }
+\keyword{models}
+\keyword{multivariate}
+\keyword{tree}
diff --git a/man/semtree.control.Rd b/man/semtree.control.Rd
index 7ce8847..df9d886 100644
--- a/man/semtree.control.Rd
+++ b/man/semtree.control.Rd
@@ -8,7 +8,7 @@
 \usage{
 semtree.control(
   method = c("naive", "score", "fair", "fair3"),
-  min.N = 20,
+  min.N = NULL,
   max.depth = NA,
   alpha = 0.05,
   alpha.invariance = NA,
@@ -24,7 +24,7 @@ semtree.control(
   report.level = 0,
   exclude.code = NA,
   linear = TRUE,
-  min.bucket = 10,
+  min.bucket = NULL,
   naive.bonferroni.type = 0,
   missing = "ignore",
   use.maxlm = FALSE,
diff --git a/tests/testthat/boruta_one_indicator.R b/tests/testthat/boruta_one_indicator.R
new file mode 100644
index 0000000..cb1e632
--- /dev/null
+++ b/tests/testthat/boruta_one_indicator.R
@@ -0,0 +1,26 @@
+influence <- c(1, 0)
+nPred <- length(influence)
+cutbreaks <- 3
+make_categorical <- TRUE
+
+
+genModel <- mxModel(type="RAM", manifestVars=c("Y", paste0("X", 1:nPred)),
+                    mxPath(paste0("X", 1:nPred), "Y", values=influence),
+                    mxPath(c("Y", paste0("X", 1:nPred)), arrows=2, values=1))
+simpleData <- mxGenerateData(genModel, N)
+if(make_categorical) {
+  for(i in paste0("X", 1:nPred)) {
+    simpleData[[i]] <- cut(simpleData[[i]], cutbreaks)
+    simpleData[[i]] <- mxFactor(simpleData[[i]], levels(simpleData[[i]]))
+  }
+}
+
+testModel <- mxModel("SimplisticModel", type="RAM", manifestVars="Y", 
+                     mxPath("Y", arrows=2, values=1, free=TRUE, labels=c("Var")),
+                     mxPath("one", "Y", values=0, free=TRUE, labels=c("mu")), 
+                     mxData(simpleData, type="raw"))
+
+control <- semforest.control(control=semtree.control(method="score", alpha=1))
+
+output <- boruta(testModel, simpleData, verbose=TRUE, control = control)
+output
\ No newline at end of file