From c14c555153fe4c8f694667582f50702f3166fe33 Mon Sep 17 00:00:00 2001 From: brandmaier Date: Mon, 30 Sep 2024 08:59:07 +0200 Subject: [PATCH] manual copy of BORUTA progress into master (because of mess with author names in commits) --- DESCRIPTION | 1 + NAMESPACE | 2 + R/boruta.R | 344 ++++++++++++++++++-------- R/toTable.R | 201 ++++++++------- man/boruta.Rd | 60 +++-- man/semtree.control.Rd | 4 +- tests/testthat/boruta_one_indicator.R | 26 ++ 7 files changed, 407 insertions(+), 231 deletions(-) create mode 100644 tests/testthat/boruta_one_indicator.R diff --git a/DESCRIPTION b/DESCRIPTION index 2c97d4f..5baa44c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,6 +17,7 @@ Imports: cluster, ggplot2, tidyr, + dplyr, methods, strucchange, sandwich, diff --git a/NAMESPACE b/NAMESPACE index 0158412..54458c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,9 +5,11 @@ S3method("[",semtree) S3method(evaluate,semforest) S3method(logLik,semtree) S3method(merge,semforest) +S3method(merge,semforest.varimp) S3method(nobs,semtree) S3method(partialDependence,semforest) S3method(partialDependence,semforest_stripped) +S3method(plot,boruta) S3method(plot,diversityMatrix) S3method(plot,partialDependence) S3method(plot,semforest.proximity) diff --git a/R/boruta.R b/R/boruta.R index f3cfcca..621060e 100644 --- a/R/boruta.R +++ b/R/boruta.R @@ -1,131 +1,255 @@ -#' BORUTA algorithm for SEM trees -#' -#' This is an experimental feature. Use cautiously. -#' -#' @aliases boruta -#' @param model A template model specification from \code{\link{OpenMx}} using -#' the \code{\link{mxModel}} function (or a \code{\link[lavaan]{lavaan}} model -#' using the \code{\link[lavaan]{lavaan}} function with option fit=FALSE). -#' Model must be syntactically correct within the framework chosen, and -#' converge to a solution. -#' @param data Data.frame used in the model creation using -#' \code{\link{mxModel}} or \code{\link[lavaan]{lavaan}} are input here. Order -#' of modeled variables and predictors is not important when providing a -#' dataset to \code{semtree}. -#' @param control \code{\link{semtree}} model specifications from -#' \code{\link{semtree.control}} are input here. Any changes from the default -#' setting can be specified here. -#' @param percentile_threshold Numeric. -#' @param rounds Numeric. Number of rounds of the BORUTA algorithm. +#' Run the Boruta algorithm on a sem tree #' +#' Grows a series of SEM Forests following the boruta algorithm to determine +#' feature importance as moderators of the underlying model. +#' +#' +#' @aliases boruta plot.boruta print.boruta +#' @param model A template SEM. Same as in \code{semtree}. +#' @param data A dataframe to boruta on. Same as in \code{semtree}. +#' @param control A semforest control object to set forest parameters. +#' @param predictors An optional list of covariates. See semtree code example. +#' @param maxRuns Maximum number of boruta search cycles +#' @param pAdjMethod A value from \link{stats::p.adjust.methods} defining a +#' multiple testing correction method +#' @param alpha p-value cutoff for decisionmaking. Default .05 +#' @param verbose Verbosity level for boruta processing +#' similar to the same argument in \link{semtree.control} and +#' \link{semforest.control} +#' @param \dots Optional parameters to undefined subfunctions +#' @return A vim object with several elements that need work. +#' Of particular note, `$importance` carries mean importance; +#' `$decision` denotes Accepted/Rejected/Tentative; +#' `$impHistory` has the entire varimp history; and +#' `$details` has exit values for each parameter. +#' @author Priyanka Paul, Timothy R. Brick, Andreas Brandmaier +#' @seealso \code{\link{semtree}} \code{\link{semforest}} +#' +#' @keywords tree models multivariate #' @export -#' boruta <- function(model, data, control = NULL, predictors = NULL, - percentile_threshold = 1, - rounds = 1, + maxRuns = 30, + pAdjMethod = "none", + alpha = .05, + verbose = FALSE, + quant = 1, ...) { + # detect model (warning: duplicated code) + if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) { + tmp <- + getPredictorsOpenMx(mxmodel = model, + dataset = data, + covariates = predictors) + model.ids <- tmp[[1]] + covariate.ids <- tmp[[2]] + } else if (inherits(model, "lavaan")) { + tmp <- + getPredictorsLavaan(model, dataset = data, covariates = predictors) + model.ids <- tmp[[1]] + covariate.ids <- tmp[[2]] + } else { + ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!") + + } # initial checks - stopifnot(percentile_threshold>=0) - stopifnot(percentile_threshold<=1) - stopifnot(is.numeric(rounds)) - stopifnot(rounds>0) + # Checks on x & y from the boruta package + if (length(grep('^shadow', covariate.ids) > 0)) + stop( + 'Attributes with names starting from "shadow" are reserved for internal use. Please rename them.' + ) + if (maxRuns < 11) + stop('maxRuns must be greater than 10.') + if (!pAdjMethod %in% stats::p.adjust.methods) + stop(c( + 'P-value adjustment method not found. Must be one of:', + stats::p.adjust.methods + )) - preds_important <- c() - preds_unimportant <- c() - - cur_round = 1 - temp_vims <- list() - - while(cur_round <= rounds) { - vim_boruta <- .boruta(model=model, - data=data, - control=control, - predictors=predictors, - percentile_threshold = percentile_threshold, - ...) - browser() - # add predictors to list of unimportant variables - preds_unimportant <- c(preds_unimportant, names(vim_boruta$filter)[!vim_boruta$filter]) - # remove them from the dataset - data <- data[, -c(preds_unimportant)] - temp_vims[[cur_round]] <-vim_boruta + # Might clash with some other semtrees stuff + if (is.null(predictors)) { + predictors <- names(data)[covariate.ids] } - result <- list( - preds_unimportant, - rounds = rounds - ) + # Initialize and then loop over runs: + impHistory <- + data.frame(matrix(NA, nrow = 0, ncol = length(predictors) + 3)) + names(impHistory) <- + c(predictors, "shadowMin", "shadowMean", "shadowMax") + decisionList <- + data.frame( + predictor = predictors, + decision = "Tentative", + hitCount = 0, + raw.p = NA, + adjusted.p = NA + ) - return(result) -} - -.boruta <- function(model, - data, - control = NULL, - predictors = NULL, - percentile_threshold = 1, - num_shadows = 1, - ...) { - - # make sure that no column names start with "shadow_" prefix - stopifnot(all(sapply(names(data), function(x) {!startsWith(x, "shadow_")}))) - - # detect model (warning: duplicated code) - if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) { - tmp <- getPredictorsOpenMx(mxmodel = model, dataset = data, covariates = predictors) - - } else if (inherits(model,"lavaan")){ + # TODO: Parallelize the first five runs. + end_time <- NULL + for (runNo in 1:maxRuns) { + start_time <- Sys.time() + if (verbose) { + time_info <- "" + if (!is.null(end_time)) time_info <- paste0("Last run took", (end_time-start_time)) + message(paste("Beginning Run", runNo," ",time_info)) + } - tmp <- getPredictorsLavaan(model, data, predictors) - } else { - ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!") - } - model.ids <- tmp[[1]] - covariate.ids <- tmp[[2]] - - # stage 1 - create shadow features - - shadow.ids <- (ncol(data) + 1):(ncol(data) + length(covariate.ids)) - - for (cur_cov_id in covariate.ids) { - for (rep_id in 1:num_shadows) { - # pick column and shuffle - temp_column <- data[, cur_cov_id] - temp_column <- sample(temp_column, length(temp_column), replace = FALSE) - # add to dataset as shadow feature - temp_colname <- paste0("shadow_", names(data)[cur_cov_id], collapse = "") - if (num_shadows>1) temp_colname <- paste0(temp_colname, rep_id, collapse = "") - data[temp_colname] <- temp_column - if (!is.null(predictors)) predictors <- c(predictors, temp_colname) + # stage 1 - create shadow features + rejected <- + decisionList$predictor[decisionList$decision == "Rejected"] + current.predictors <- setdiff(predictors, rejected) + current.covariate.ids <- + setdiff(covariate.ids, names(data) %in% rejected) + current.data <- data[, setdiff(names(data), rejected)] + + shadow.ids <- + (ncol(current.data) + 1):(ncol(current.data) + length(current.covariate.ids)) + + for (cur_cov_id in current.covariate.ids) { + # pick column and shuffle + temp_column <- current.data[, cur_cov_id] + temp_column <- + sample(temp_column, length(temp_column), replace = FALSE) + # add to dataset as shadow feature + temp_colname <- + paste0("shadow_", names(current.data)[cur_cov_id], collapse = "") + current.data[temp_colname] <- temp_column + if (!is.null(current.predictors)) + current.predictors <- c(current.predictors, temp_colname) + } + + # TODO: Pre-run model if needed. + + # run the forest + forest <- + semforest(model, current.data, control, current.predictors, ...) + + # run variable importance + vim <- varimp(forest) + + # get variable importance from shadow features + shadow_names <- names(current.data)[shadow.ids] + agvim <- aggregateVarimp(vim, aggregate = "mean") + + # Compute shadow stats + shadow_importances <- agvim[names(agvim) %in% shadow_names] + impHistory[runNo, "shadowMax"] <- max(shadow_importances, na.rm=TRUE) + + max_shadow_importance <- stats::quantile(shadow_importances, + probs=quant,na.rm=TRUE) + + impHistory[runNo, "shadowMin"] <- min(shadow_importances, na.rm=TRUE) + impHistory[runNo, "shadowMean"] <- mean(shadow_importances, na.rm=TRUE) + agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)] + impHistory[runNo, names(agvim_filtered)] <- agvim_filtered + + # Compute "hits" + hits <- + decisionList$predictor %in% names(agvim_filtered[agvim_filtered > max_shadow_importance]) + decisionList$hitCount[hits] <- decisionList$hitCount[hits] + 1 + + # Run tests. + # The biasing here means that there are no decisions without correction + # before 5 runs and no decisions with Bonferroni before 7 runs. + + # Run confirmation tests (pulled from Boruta package) + newPs <- + stats::pbinom(decisionList$hitCount - 1, runNo, 0.5, lower.tail = FALSE) + adjPs <- stats::p.adjust(newPs, method = pAdjMethod) + acceptable <- adjPs < alpha + updateList <- acceptable & decisionList$decision == "Tentative" + decisionList$raw.p[updateList] <- newPs[updateList] + decisionList$adjusted.p[updateList] <- adjPs[updateList] + decisionList$decision[updateList] <- "Confirmed" + + # Run rejection tests (pulled from Boruta package) + newPs <- + stats::pbinom(decisionList$hitCount, runNo, 0.5, lower.tail = TRUE) + adjPs <- stats::p.adjust(newPs, method = pAdjMethod) + acceptable <- adjPs < alpha + updateList <- acceptable & decisionList$decision == "Tentative" + decisionList$raw.p[updateList] <- newPs[updateList] + decisionList$adjusted.p[updateList] <- adjPs[updateList] + decisionList$decision[updateList] <- "Rejected" + + if (!any(decisionList$decision == "Tentative")) { + break } + + end_time <- Sys.time() + } - - # run the forest - forest <- semforest(model, data, control, predictors, ...) - - # run variable importance - vim <- varimp(forest) - - # get variable importance from shadow features - shadow_names <- names(data)[shadow.ids] - agvim <- aggregateVarimp(vim, aggregate = "mean") - vals <- agvim[names(agvim) %in% shadow_names] - #max_shadow_importance <- max(vals) - max_shadow_importance <- quantile(vals, percentile_threshold) + vim$importance <- colMeans(impHistory, na.rm = TRUE) + vim$impHistory <- impHistory + vim$decisions <- decisionList$decision + vim$details <- decisionList - agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)] - - df <- data.frame(importance = agvim_filtered, predictor = names(agvim_filtered)) - - vim$filter <- agvim_filtered > max_shadow_importance + vim$filter <- + decisionList$decision == "Confirmed" # Turns into hitreg vim$boruta <- TRUE - vim$boruta_threshold <- max_shadow_importance - vim$percentile_threshold <- percentile_threshold - + + class(vim) <- "boruta" + + # TODO: Loop ends here with some reporting. + return(vim) } + + +#' @exportS3Method plot boruta +plot.boruta = function(vim, type = 0, ...) { + decisionList = vim$details + impHistory = vim$impHistory + impHistory <- impHistory |> + dplyr::mutate(rnd = 1:nrow(impHistory)) |> + tidyr::pivot_longer(cols = -last_col()) |> #everything()) |> + dplyr::left_join(data.frame(decisionList), + by = dplyr::join_by("name" == "predictor")) |> + dplyr::mutate(decision = + dplyr::case_when(is.na(decision) ~ "Shadow", .default = decision)) |> + dplyr::group_by(name) |> + dplyr::mutate(median_value = median(value, na.rm = TRUE)) + + if (type == 0) { + + # sort Inf values to the left + #impHistory[is.na(impHistory$median_value)] <- -Inf + impHistory$sort_value <- impHistory$median_value + # mv <- min(impHistory$sort_value,na.rm=TRUE) + # impHistory$sort_value[impHistory$decision=="Rejected"]<- (mv-1) + + ggplot2::ggplot(impHistory, + ggplot2::aes( + x = stats::reorder(name, sort_value), + y = value, + color = decision + )) + + ggplot2::geom_boxplot() + + ggplot2::xlab("") + + ggplot2::ylab("Importance") + + ggplot2::scale_color_discrete(name = "Decision") + + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + } else if (type == 1) { + ggplot2::ggplot(impHistory, + ggplot2::aes( + x = rnd, + y = value, + group = name, + col = name + )) + + ggplot2::geom_line() + + ggplot2::geom_hline(ggplot2::aes(yintercept = median_value, col = name), lwd = + 2) + + ggplot2::xlab("Round") + + ggplot2::ylab("Importance") + + ggplot2::scale_color_discrete(name = "Predictor") + } else { + stop("Unknown graph type. Please choose 0 or 1.") + } + +} \ No newline at end of file diff --git a/R/toTable.R b/R/toTable.R index 337c235..f7a96ac 100644 --- a/R/toTable.R +++ b/R/toTable.R @@ -48,116 +48,115 @@ treeToTable <- function(tree, colDataList=list(), result=list()) #' Psychology}. #' @export toTable <- function(tree, added.param.cols=NULL, round.param=NULL) { - - -# collect all data -alls <- c() -rowdata <- treeToTable(tree) -for (i in 1:length(rowdata)) { - myrow <- rowdata[[i]][[1]] - for (j in 1:length(myrow)) { - myitem <- myrow[[j]] - alls[length(alls)+1] <- myitem[[1]]$name + + + # collect all data + alls <- c() + rowdata <- treeToTable(tree) + for (i in 1:length(rowdata)) { + myrow <- rowdata[[i]][[1]] + for (j in 1:length(myrow)) { + myitem <- myrow[[j]] + alls[length(alls)+1] <- myitem[[1]]$name + } } -} -alls <- unique(alls) -# collect all variables - - -# create table -#covariate.names <-simplify2array(tree$result$btn.matrix[2,]) -covariate.names <- getCovariatesFromTree(tree) - -# default is to display all parameters -if (is.null(added.param.cols)) { - added.param.cols <- names(tree$params) -} - -# all column names for the table to be generated (covariate names and parameter names) -all.names <- c(covariate.names, added.param.cols) - -str.matrix <- matrix(NA, nrow = length(rowdata),ncol=length(all.names)) - -colnames(str.matrix) <- all.names - -for (i in 1:length(rowdata)) { -# result.row <- rep(" ",length(covariate.names)) - myrow <- rowdata[[i]][[1]] - for (j in 1:length(myrow)) { - myitem <- myrow[[j]] - varid <- which(covariate.names==myitem[[1]]$name) - state <- myitem[[2]] - if (state==1) { # state encodes whether TRUE OR FALSE - if (myitem[[1]]$relation==">=") { - rule <- paste(">=",saferound(myitem[[1]]$value,2)) - } else if (myitem[[1]]$relation=="%in%") { - rule <- paste(myitem[[1]]$value, collapse=" or ") + alls <- unique(alls) + # collect all variables + + + # create table + covariate.names <- getCovariatesFromTree(tree) + + # default is to display all parameters + if (is.null(added.param.cols)) { + added.param.cols <- names(tree$params) + } + + # all column names for the table to be generated (covariate names and parameter names) + all.names <- c(covariate.names, added.param.cols) + + str.matrix <- matrix(NA, nrow = length(rowdata),ncol=length(all.names)) + + # convert to a data frame to avoid coercion to string + str.matrix <- data.frame(str.matrix) + + colnames(str.matrix) <- all.names + + for (i in 1:length(rowdata)) { + myrow <- rowdata[[i]][[1]] + for (j in 1:length(myrow)) { + myitem <- myrow[[j]] + varid <- which(covariate.names==myitem[[1]]$name) + state <- myitem[[2]] + if (state==1) { # state encodes whether TRUE OR FALSE + if (myitem[[1]]$relation==">=") { + rule <- paste(">=",saferound(myitem[[1]]$value,2)) + } else if (myitem[[1]]$relation=="%in%") { + rule <- paste(myitem[[1]]$value, collapse=" or ") + } else { + rule <- "UNKNOWN" + } } else { - rule <- "UNKNOWN" - } - } else { - - # invert rule - rule <- "" - if (myitem[[1]]$relation==">=") { - rule <- paste("<",saferound(myitem[[1]]$value,2)) - } else if (myitem[[1]]$relation=="%in%") { - rule <- paste("not (",paste(myitem[[1]]$value,collapse=" or "),")") - } else + + # invert rule + rule <- "" + if (myitem[[1]]$relation==">=") { + rule <- paste("<",saferound(myitem[[1]]$value,2)) + } else if (myitem[[1]]$relation=="%in%") { + rule <- paste("not (",paste(myitem[[1]]$value,collapse=" or "),")") + } else { rule <- "UNKNOWN" } + } + if (is.na(str.matrix[i, varid])) { + str.matrix[i,varid] <- rule + } else { + str.matrix[i,varid] <- paste(str.matrix[i,varid]," & ", rule) + } } - if (is.na(str.matrix[i, varid])) { - str.matrix[i,varid] <- rule - } else { - str.matrix[i,varid] <- paste(str.matrix[i,varid]," & ", rule) + # fill variable names + if (!is.null(added.param.cols)) { + for (j in 1:length(added.param.cols)) { + colid <- which(colnames(str.matrix)==added.param.cols[j]) + param <- rowdata[[i]][[2]][which(names(rowdata[[i]][[2]])==added.param.cols[j])] + if (!is.null(round.param)) { + param <- saferound(param, round.param) + } + str.matrix[i,colid] <- param + } } + } - # fill variable names - if (!is.null(added.param.cols)) { - for (j in 1:length(added.param.cols)) { - colid <- which(colnames(str.matrix)==added.param.cols[j]) - param <- rowdata[[i]][[2]][which(names(rowdata[[i]][[2]])==added.param.cols[j])] - if (!is.null(round.param)) { - param <- saferound(param, round.param) - } - str.matrix[i,colid] <- param + + ## prune empty columns? + is.col.empty <- which(apply(str.matrix,2,function(x){all(is.na(x))})) + if (length(is.col.empty)>0) { + str.matrix <- str.matrix[,-is.col.empty] } + + # sort columns according to number of elements + sortby <- apply(str.matrix,2,function(x){sum(!is.na(x))}) + if (!is.null(added.param.cols)) { + remids <- (dim(str.matrix)[2]-length(added.param.cols)+1):(dim(str.matrix)[2]) + sortby[remids] <- sortby[remids]-999999 # bad style ^^ #TODO } + sort.ix <- sort(sortby,index.return=TRUE,decreasing = TRUE)$ix + str.matrix <- str.matrix[, sort.ix] + + + str.matrix[is.na(str.matrix)]<-"" + + ## format output + #result.string <- paste(paste(colnames(str.matrix),collapse="\t"),"\n") + #for (i in 1:dim(str.matrix)[1]) { + # result.string <- paste(result.string,paste(str.matrix[i,],collapse="\t"),"\n") + #} + + ## and display + #cat(result.string) + + + return(str.matrix) -# result.string <- paste(result.string,paste(result.row,collapse="\t"),"\n") -} - -## prune empty columns? -is.col.empty <- which(apply(str.matrix,2,function(x){all(is.na(x))})) -if (length(is.col.empty)>0) { - str.matrix <- str.matrix[,-is.col.empty] -} - -# sort columns according to number of elements -sortby <- apply(str.matrix,2,function(x){sum(!is.na(x))}) -if (!is.null(added.param.cols)) { - remids <- (dim(str.matrix)[2]-length(added.param.cols)+1):(dim(str.matrix)[2]) - sortby[remids] <- sortby[remids]-999999 -} -sort.ix <- sort(sortby,index.return=TRUE,decreasing = TRUE)$ix -str.matrix <- str.matrix[, sort.ix] - - -str.matrix[is.na(str.matrix)]<-"" - -## format output -#result.string <- paste(paste(colnames(str.matrix),collapse="\t"),"\n") -#for (i in 1:dim(str.matrix)[1]) { -# result.string <- paste(result.string,paste(str.matrix[i,],collapse="\t"),"\n") -#} - -## and display -#cat(result.string) - -#require("openxlsx") - -return(str.matrix) - } diff --git a/man/boruta.Rd b/man/boruta.Rd index f0e8b85..8c1fc76 100644 --- a/man/boruta.Rd +++ b/man/boruta.Rd @@ -2,38 +2,62 @@ % Please edit documentation in R/boruta.R \name{boruta} \alias{boruta} -\title{BORUTA algorithm for SEM trees} +\alias{plot.boruta} +\alias{print.boruta} +\title{Run the Boruta algorithm on a sem tree} \usage{ boruta( model, data, control = NULL, predictors = NULL, - percentile_threshold = 100, - rounds = 1, + maxRuns = 30, + pAdjMethod = "none", + alpha = 0.05, + verbose = FALSE, + quant = 1, ... ) } \arguments{ -\item{model}{A template model specification from \code{\link{OpenMx}} using -the \code{\link{mxModel}} function (or a \code{\link[lavaan]{lavaan}} model -using the \code{\link[lavaan]{lavaan}} function with option fit=FALSE). -Model must be syntactically correct within the framework chosen, and -converge to a solution.} +\item{model}{A template SEM. Same as in \code{semtree}.} -\item{data}{Data.frame used in the model creation using -\code{\link{mxModel}} or \code{\link[lavaan]{lavaan}} are input here. Order -of modeled variables and predictors is not important when providing a -dataset to \code{semtree}.} +\item{data}{A dataframe to boruta on. Same as in \code{semtree}.} -\item{control}{\code{\link{semtree}} model specifications from -\code{\link{semtree.control}} are input here. Any changes from the default -setting can be specified here.} +\item{control}{A semforest control object to set forest parameters.} -\item{percentile_threshold}{Numeric.} +\item{predictors}{An optional list of covariates. See semtree code example.} -\item{rounds}{Numeric. Number of rounds of the BORUTA algorithm.} +\item{maxRuns}{Maximum number of boruta search cycles} + +\item{pAdjMethod}{A value from \link{stats::p.adjust.methods} defining a +multiple testing correction method} + +\item{alpha}{p-value cutoff for decisionmaking. Default .05} + +\item{verbose}{Verbosity level for boruta processing +similar to the same argument in \link{semtree.control} and +\link{semforest.control}} + +\item{\dots}{Optional parameters to undefined subfunctions} +} +\value{ +A vim object with several elements that need work. + Of particular note, `$importance` carries mean importance; + `$decision` denotes Accepted/Rejected/Tentative; + `$impHistory` has the entire varimp history; and + `$details` has exit values for each parameter. } \description{ -BORUTA algorithm for SEM trees +Grows a series of SEM Forests following the boruta algorithm to determine + feature importance as moderators of the underlying model. +} +\seealso{ +\code{\link{semtree}} \code{\link{semforest}} +} +\author{ +Priyanka Paul, Timothy R. Brick, Andreas Brandmaier } +\keyword{models} +\keyword{multivariate} +\keyword{tree} diff --git a/man/semtree.control.Rd b/man/semtree.control.Rd index 7ce8847..df9d886 100644 --- a/man/semtree.control.Rd +++ b/man/semtree.control.Rd @@ -8,7 +8,7 @@ \usage{ semtree.control( method = c("naive", "score", "fair", "fair3"), - min.N = 20, + min.N = NULL, max.depth = NA, alpha = 0.05, alpha.invariance = NA, @@ -24,7 +24,7 @@ semtree.control( report.level = 0, exclude.code = NA, linear = TRUE, - min.bucket = 10, + min.bucket = NULL, naive.bonferroni.type = 0, missing = "ignore", use.maxlm = FALSE, diff --git a/tests/testthat/boruta_one_indicator.R b/tests/testthat/boruta_one_indicator.R new file mode 100644 index 0000000..cb1e632 --- /dev/null +++ b/tests/testthat/boruta_one_indicator.R @@ -0,0 +1,26 @@ +influence <- c(1, 0) +nPred <- length(influence) +cutbreaks <- 3 +make_categorical <- TRUE + + +genModel <- mxModel(type="RAM", manifestVars=c("Y", paste0("X", 1:nPred)), + mxPath(paste0("X", 1:nPred), "Y", values=influence), + mxPath(c("Y", paste0("X", 1:nPred)), arrows=2, values=1)) +simpleData <- mxGenerateData(genModel, N) +if(make_categorical) { + for(i in paste0("X", 1:nPred)) { + simpleData[[i]] <- cut(simpleData[[i]], cutbreaks) + simpleData[[i]] <- mxFactor(simpleData[[i]], levels(simpleData[[i]])) + } +} + +testModel <- mxModel("SimplisticModel", type="RAM", manifestVars="Y", + mxPath("Y", arrows=2, values=1, free=TRUE, labels=c("Var")), + mxPath("one", "Y", values=0, free=TRUE, labels=c("mu")), + mxData(simpleData, type="raw")) + +control <- semforest.control(control=semtree.control(method="score", alpha=1)) + +output <- boruta(testModel, simpleData, verbose=TRUE, control = control) +output \ No newline at end of file