diff --git a/.github/workflows/R-CMD-check-windows.yaml b/.github/workflows/R-CMD-check-windows.yaml new file mode 100644 index 0000000..1d68a31 --- /dev/null +++ b/.github/workflows/R-CMD-check-windows.yaml @@ -0,0 +1,29 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check-Win + +jobs: + R-CMD-check: + runs-on: windows-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 diff --git a/NAMESPACE b/NAMESPACE index 3fef987..0158412 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ S3method(summary,semforest) S3method(summary,semtree) S3method(toLatex,semtree) export(biodiversity) +export(boruta) export(diversityMatrix) export(evaluateTree) export(fitSubmodels) @@ -65,6 +66,7 @@ export(semforest_score_control) export(semtree) export(semtree.constraints) export(semtree.control) +export(semtree_control) export(strip) export(subforest) export(subtree) diff --git a/R/boruta.R b/R/boruta.R index 80ffd06..f3cfcca 100644 --- a/R/boruta.R +++ b/R/boruta.R @@ -1,35 +1,107 @@ +#' BORUTA algorithm for SEM trees +#' +#' This is an experimental feature. Use cautiously. +#' +#' @aliases boruta +#' @param model A template model specification from \code{\link{OpenMx}} using +#' the \code{\link{mxModel}} function (or a \code{\link[lavaan]{lavaan}} model +#' using the \code{\link[lavaan]{lavaan}} function with option fit=FALSE). +#' Model must be syntactically correct within the framework chosen, and +#' converge to a solution. +#' @param data Data.frame used in the model creation using +#' \code{\link{mxModel}} or \code{\link[lavaan]{lavaan}} are input here. Order +#' of modeled variables and predictors is not important when providing a +#' dataset to \code{semtree}. +#' @param control \code{\link{semtree}} model specifications from +#' \code{\link{semtree.control}} are input here. Any changes from the default +#' setting can be specified here. +#' @param percentile_threshold Numeric. +#' @param rounds Numeric. Number of rounds of the BORUTA algorithm. +#' +#' @export +#' boruta <- function(model, data, control = NULL, predictors = NULL, + percentile_threshold = 1, + rounds = 1, ...) { - # TODO: make sure that no column names start with "shadow_" prefix + + # initial checks + stopifnot(percentile_threshold>=0) + stopifnot(percentile_threshold<=1) + stopifnot(is.numeric(rounds)) + stopifnot(rounds>0) + + preds_important <- c() + preds_unimportant <- c() + + cur_round = 1 + temp_vims <- list() + + while(cur_round <= rounds) { + vim_boruta <- .boruta(model=model, + data=data, + control=control, + predictors=predictors, + percentile_threshold = percentile_threshold, + ...) + browser() + # add predictors to list of unimportant variables + preds_unimportant <- c(preds_unimportant, names(vim_boruta$filter)[!vim_boruta$filter]) + # remove them from the dataset + data <- data[, -c(preds_unimportant)] + temp_vims[[cur_round]] <-vim_boruta + } + + result <- list( + preds_unimportant, + rounds = rounds + ) + + return(result) +} + +.boruta <- function(model, + data, + control = NULL, + predictors = NULL, + percentile_threshold = 1, + num_shadows = 1, + ...) { + + # make sure that no column names start with "shadow_" prefix + stopifnot(all(sapply(names(data), function(x) {!startsWith(x, "shadow_")}))) # detect model (warning: duplicated code) if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) { tmp <- getPredictorsOpenMx(mxmodel = model, dataset = data, covariates = predictors) - model.ids <- tmp[[1]] - covariate.ids <- tmp[[2]] - # } else if (inherits(model,"lavaan")){ - # } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) { - # + } else if (inherits(model,"lavaan")){ + + tmp <- getPredictorsLavaan(model, data, predictors) } else { ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!") } + model.ids <- tmp[[1]] + covariate.ids <- tmp[[2]] # stage 1 - create shadow features shadow.ids <- (ncol(data) + 1):(ncol(data) + length(covariate.ids)) for (cur_cov_id in covariate.ids) { + for (rep_id in 1:num_shadows) { # pick column and shuffle temp_column <- data[, cur_cov_id] temp_column <- sample(temp_column, length(temp_column), replace = FALSE) # add to dataset as shadow feature temp_colname <- paste0("shadow_", names(data)[cur_cov_id], collapse = "") + if (num_shadows>1) temp_colname <- paste0(temp_colname, rep_id, collapse = "") data[temp_colname] <- temp_column if (!is.null(predictors)) predictors <- c(predictors, temp_colname) + } } # run the forest @@ -41,7 +113,11 @@ boruta <- function(model, # get variable importance from shadow features shadow_names <- names(data)[shadow.ids] agvim <- aggregateVarimp(vim, aggregate = "mean") - max_shadow_importance <- max(agvim[names(agvim) %in% shadow_names]) + + vals <- agvim[names(agvim) %in% shadow_names] + #max_shadow_importance <- max(vals) + max_shadow_importance <- quantile(vals, percentile_threshold) + agvim_filtered <- agvim[!(names(agvim) %in% shadow_names)] df <- data.frame(importance = agvim_filtered, predictor = names(agvim_filtered)) @@ -49,6 +125,7 @@ boruta <- function(model, vim$filter <- agvim_filtered > max_shadow_importance vim$boruta <- TRUE vim$boruta_threshold <- max_shadow_importance + vim$percentile_threshold <- percentile_threshold return(vim) } diff --git a/R/checkControl.R b/R/checkControl.R index 4bcd8f1..59f9d5d 100644 --- a/R/checkControl.R +++ b/R/checkControl.R @@ -11,6 +11,12 @@ checkControl <- function(control, fail = TRUE) { check.semtree.control <- function(control, fail = TRUE) { attr <- attributes(control)$names def.attr <- attributes(semtree.control())$names + + # add NULL-defaults + null_def <- c("min.N","min.bucket","strucchange.to") + attr <- unique(c(attr, null_def)) + def.attr <- unique(c(def.attr, null_def)) + if ((length(intersect(attr, def.attr)) != length(attr))) { unknown <- setdiff(attr, def.attr) msg <- diff --git a/R/checkModel.R b/R/checkModel.R index bd2ded8..2c11479 100644 --- a/R/checkModel.R +++ b/R/checkModel.R @@ -24,6 +24,3 @@ checkModel <- function(model, control) return(TRUE); } - -#inherits(model1,"lavaan") -#model1@Fit@converged diff --git a/R/growTree.R b/R/growTree.R index e47c4a3..630426b 100644 --- a/R/growTree.R +++ b/R/growTree.R @@ -64,7 +64,40 @@ growTree <- function(model = NULL, mydata = NULL, ui_message("Subsampled predictors: ", paste(node$colnames[meta$covariate.ids])) } } + + # override forced split? + arguments <- list(...) + if ("forced_splits" %in% names(arguments) && !is.null(arguments$forced_splits)) { + forced_splits <- arguments$forced_splits + + # get names of model variables before forcing + model.names <- names(mydata)[meta$model.ids] + covariate.names <- names(mydata)[meta$covariate.ids] + + # select subset with model variables and single, forced predictor + forcedsplit.name <- forced_splits[1] + + if (control$verbose) { + cat("FORCED split: ",forcedsplit.name,"\n") + } + + mydata <- fulldata[, c(model.names, forcedsplit.name) ] + node$colnames <- colnames(mydata) + + # get new model ids after sampling by name + meta$model.ids <- sapply(model.names, function(x) { + which(x == names(mydata)) + }) + names(meta$model.ids) <- NULL + meta$covariate.ids <- unlist(lapply(covariate.names, function(x) { + which(x == names(mydata)) + })) + + } else { + forced_splits <- NULL + } + # determine whether split evaluation can be done on p values node$p.values.valid <- control$method != "cv" @@ -432,6 +465,31 @@ growTree <- function(model = NULL, mydata = NULL, mydata <- fulldata meta <- fullmeta } + + # restore mydata if forced split was true + # and (potentially) force continuation of splitting + if (!is.null(forced_splits)) { + + + # also need to remap col.max to original data! + if (!is.null(result$col.max) && !is.na(result$col.max)) { + col.max.name <- names(mydata)[result$col.max] + result$col.max <- which(names(fulldata) == col.max.name) + } else { + col.max.name <- NULL + } + + mydata <- fulldata + meta <- fullmeta + + # pop first element + forced_splits <- forced_splits[-1] + # set to NULL if no splits left + if (length(forced_splits)==0) forced_splits <- NULL + + # force continuation of splitting ? + cont.split <- TRUE + } if ((!is.null(cont.split)) && (!is.na(cont.split)) && (cont.split)) { if (control$report.level > 10) { @@ -563,8 +621,8 @@ growTree <- function(model = NULL, mydata = NULL, # recursively continue splitting # result1 - RHS; result2 - LHS - result2 <- growTree(node$model, sub2, control, invariance, meta, edgelabel = 0, depth = depth + 1, constraints) - result1 <- growTree(node$model, sub1, control, invariance, meta, edgelabel = 1, depth = depth + 1, constraints) + result2 <- growTree(node$model, sub2, control, invariance, meta, edgelabel = 0, depth = depth + 1, constraints, forced_splits = forced_splits) + result1 <- growTree(node$model, sub1, control, invariance, meta, edgelabel = 1, depth = depth + 1, constraints, forced_splits = forced_splits) # store results in recursive list structure node$left_child <- result2 diff --git a/R/semtree.R b/R/semtree.R index e0ad5d4..1bd434c 100644 --- a/R/semtree.R +++ b/R/semtree.R @@ -140,16 +140,41 @@ semtree <- function(model, data = NULL, control = NULL, constraints = NULL, } } + # here we decide between four cases depending + # on whether min.N is given and/or min.bucket is given # this is a really dumb heuristic # please can someone replace this with something more useful # this based on (Bentler & Chou, 1987; see also Bollen, 1989) + if (is.null(control$min.N)) { - control$min.N <- 5 * npar(model) + + if (is.null(control$min.bucket)) { + # both values were not specified + control$min.N <- max(20, 5 * npar(model)) + control$min.bucket <- max(10, control$min.N / 2) + } else { + # only min.bucket was given, min.N was not specified + control$min.N <- control$min.bucket * 2 + } + } else { + if (is.null(control$min.bucket)) { + # only min.N was given, min.bucket was not specified + control$min.bucket <- max(10, control$min.N / 2) + } else { + # do nothing, both values were specified + if (control$min.bucket > control$min.N) { + warning("Min.bucket parameter should probably be smaller than min.N!") + } + } + } + + if (is.null(control$min.N)) { + } # set min.bucket and min.N heuristically if (is.null(control$min.bucket)) { - control$min.bucket <- control$min.N / 2 + } if (control$method == "cv") { diff --git a/R/semtree.control.R b/R/semtree.control.R index 1053412..aac81c0 100644 --- a/R/semtree.control.R +++ b/R/semtree.control.R @@ -100,7 +100,7 @@ #' @export semtree.control <- function(method = c("naive","score","fair","fair3"), - min.N = 20, + min.N = NULL, max.depth = NA, alpha = .05, alpha.invariance = NA, @@ -119,7 +119,7 @@ semtree.control <- # ordinal = 'maxLMo', # and maxLM are available # metric = 'maxLM'), linear = TRUE, - min.bucket = 10, + min.bucket = NULL, naive.bonferroni.type = 0, missing = 'ignore', use.maxlm = FALSE, @@ -200,8 +200,6 @@ semtree.control <- return(options) } - - #' @export semtree_control <- function(...) { semtree.control(...) diff --git a/R/varimp.R b/R/varimp.R index 9e17be2..4d51ed0 100644 --- a/R/varimp.R +++ b/R/varimp.R @@ -115,11 +115,14 @@ varimp <- function(forest, colnames(result$importance.level1) <- var.names } + if (dim(result$importance)[1] == 1) { - #result$importance<-t(result$importance) + result$importance<-t(result$importance) + + # TODO: this is stupid, should be as.matrix?! or something else result$ll.baselines <- - t(t(result$ll.baselines)) # TODO: this is stupid, should be as.matrix?! - } + t(t(result$ll.baselines)) + } colnames(result$importance) <- var.names result$var.names <- var.names diff --git a/docs/404.html b/docs/404.html index b7b4f49..379ce99 100644 --- a/docs/404.html +++ b/docs/404.html @@ -112,7 +112,7 @@
Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.6.
diff --git a/docs/CONTRIBUTORS.html b/docs/CONTRIBUTORS.html index b5acfac..4fdcdb5 100644 --- a/docs/CONTRIBUTORS.html +++ b/docs/CONTRIBUTORS.html @@ -88,7 +88,7 @@vignettes/constraints.Rmd
constraints.Rmd
tree.gc <- semtree(model.cfa, data=cfa.sim, constraints=
semtree.constraints(global.invariance =
c("F__x1","F__x2","F__x3","F__x4")))
-#> > Model was not run. Estimating parameters now.
+#> ❯ Model was not run. Estimating parameters now.
#>
Beginning initial fit attempt
-Fit attempt 0, fit=1259.86964139476, new current best! (was 23573.1076511312)
+Fit attempt 0, fit=1245.04605304561, new current best! (was 23512.9380282892)
-> Global Constraints:
+❯ Global Constraints:
#> F__x1 F__x2 F__x3 F__x4
-#> > Freely Estimated Parameters:
+#> ❯ Freely Estimated Parameters:
#> VAR_x1 VAR_x2 VAR_x3 VAR_x4 const__x2 const__x3 const__x4 const__F
#>
Beginning initial fit attempt
-
-
-Beginning initial fit attempt
-Fit attempt 0, fit=626.987092715913, new current best! (was 868.957879718916)
-
-
-Beginning initial fit attempt
-Fit attempt 0, fit=21.8133858849526, new current best! (was 297.189240590628)
-Beginning fit attempt 1 of at maximum 10 extra tries
-Fit attempt 1, fit=21.8133858848553, new current best! (was 21.8133858849526)
-Beginning fit attempt 2 of at maximum 10 extra tries
-Beginning fit attempt 3 of at maximum 10 extra tries
-Beginning fit attempt 4 of at maximum 10 extra tries
-Beginning fit attempt 5 of at maximum 10 extra tries
-Beginning fit attempt 6 of at maximum 10 extra tries
-Beginning fit attempt 7 of at maximum 10 extra tries
-Beginning fit attempt 8 of at maximum 10 extra tries
-Beginning fit attempt 9 of at maximum 10 extra tries
-Beginning fit attempt 10 of at maximum 10 extra tries
-
-
-Beginning initial fit attempt
-Fit attempt 0, fit=20.4870677058905, new current best! (was 329.797852125334)
-Beginning fit attempt 1 of at maximum 10 extra tries
-Beginning fit attempt 2 of at maximum 10 extra tries
-Beginning fit attempt 3 of at maximum 10 extra tries
-Beginning fit attempt 4 of at maximum 10 extra tries
-Beginning fit attempt 5 of at maximum 10 extra tries
-Beginning fit attempt 6 of at maximum 10 extra tries
-Beginning fit attempt 7 of at maximum 10 extra tries
-Beginning fit attempt 8 of at maximum 10 extra tries
-Beginning fit attempt 9 of at maximum 10 extra tries
-Beginning fit attempt 10 of at maximum 10 extra tries
-
-
-Beginning initial fit attempt
-Fit attempt 0, fit=140.391476915093, new current best! (was 390.911761675748)
-Beginning fit attempt 1 of at maximum 10 extra tries
-Fit attempt 1, fit=140.391476914989, new current best! (was 140.391476915093)
-Beginning fit attempt 2 of at maximum 10 extra tries
-Fit attempt 2, fit=140.391476914986, new current best! (was 140.391476914989)
-Beginning fit attempt 3 of at maximum 10 extra tries
-Beginning fit attempt 4 of at maximum 10 extra tries
-Beginning fit attempt 5 of at maximum 10 extra tries
-Beginning fit attempt 6 of at maximum 10 extra tries
-Beginning fit attempt 7 of at maximum 10 extra tries
-Beginning fit attempt 8 of at maximum 10 extra tries
-Beginning fit attempt 9 of at maximum 10 extra tries
-Beginning fit attempt 10 of at maximum 10 extra tries
-
-
[32m✔
[39m Tree construction finished [took 6s].
+Fit attempt 0, fit=1245.0460530455, new current best! (was 1245.04605304558)
+
+
+Beginning initial fit attempt
+Fit attempt 0, fit=700.981679486895, new current best! (was 935.197549062569)
+
+
+Beginning initial fit attempt
+Fit attempt 0, fit=82.3725219274684, new current best! (was 404.027616923408)
+Beginning fit attempt 1 of at maximum 10 extra tries
+Fit attempt 1, fit=82.3725219274052, new current best! (was 82.3725219274684)
+
+
+Beginning initial fit attempt
+Fit attempt 0, fit=-12.8178155443534, new current best! (was 296.954062563265)
+Beginning fit attempt 1 of at maximum 10 extra tries
+Fit attempt 1, fit=-12.8178155443725, new current best! (was -12.8178155443534)
+Beginning fit attempt 2 of at maximum 10 extra tries
+Beginning fit attempt 3 of at maximum 10 extra tries
+Beginning fit attempt 4 of at maximum 10 extra tries
+Beginning fit attempt 5 of at maximum 10 extra tries
+Beginning fit attempt 6 of at maximum 10 extra tries
+Beginning fit attempt 7 of at maximum 10 extra tries
+Beginning fit attempt 8 of at maximum 10 extra tries
+Beginning fit attempt 9 of at maximum 10 extra tries
+Beginning fit attempt 10 of at maximum 10 extra tries
+
+
+Beginning initial fit attempt
+Fit attempt 0, fit=41.8996556241436, new current best! (was 309.848503983403)
+Beginning fit attempt 1 of at maximum 10 extra tries
+Fit attempt 1, fit=41.8996556240704, new current best! (was 41.8996556241436)
+Beginning fit attempt 2 of at maximum 10 extra tries
+Fit attempt 2, fit=41.8996556240695, new current best! (was 41.8996556240704)
+Beginning fit attempt 3 of at maximum 10 extra tries
+Beginning fit attempt 4 of at maximum 10 extra tries
+Beginning fit attempt 5 of at maximum 10 extra tries
+Beginning fit attempt 6 of at maximum 10 extra tries
+Beginning fit attempt 7 of at maximum 10 extra tries
+Beginning fit attempt 8 of at maximum 10 extra tries
+Beginning fit attempt 9 of at maximum 10 extra tries
+Beginning fit attempt 10 of at maximum 10 extra tries
+
+
[32m✔
[39m Tree construction finished [took less than a second].
plot(tree.gc)
tree.lc <- semtree(model.cfa, data=cfa.sim, constraints=
semtree.constraints(
local.invariance= c("F__x1","F__x2","F__x3","F__x4")))
-#> > Model was not run. Estimating parameters now.
+#> ❯ Model was not run. Estimating parameters now.
#>
Beginning initial fit attempt
-Fit attempt 0, fit=1259.86964139476, new current best! (was 23573.1076511312)
+Fit attempt 0, fit=1245.04605304561, new current best! (was 23512.9380282892)
-> No Invariance alpha selected. alpha.invariance set to:0.05
+❯ No Invariance alpha selected. alpha.invariance set to:0.05
#>
Beginning initial fit attempt
-
-
-Beginning initial fit attempt
-Fit attempt 0, fit=594.224449688255, new current best! (was 868.957879718916)
-
-
-Beginning initial fit attempt
-Fit attempt 0, fit=118.828548931502, new current best! (was 390.911761675748)
-Beginning fit attempt 1 of at maximum 10 extra tries
-Fit attempt 1, fit=118.828548931421, new current best! (was 118.828548931502)
-Beginning fit attempt 2 of at maximum 10 extra tries
-Beginning fit attempt 3 of at maximum 10 extra tries
-Beginning fit attempt 4 of at maximum 10 extra tries
-Beginning fit attempt 5 of at maximum 10 extra tries
-Beginning fit attempt 6 of at maximum 10 extra tries
-Beginning fit attempt 7 of at maximum 10 extra tries
-Beginning fit attempt 8 of at maximum 10 extra tries
-Beginning fit attempt 9 of at maximum 10 extra tries
-Beginning fit attempt 10 of at maximum 10 extra tries
-
-
[32m✔
[39m Tree construction finished [took 5s].
+Fit attempt 0, fit=1245.04605304549, new current best! (was 1245.04605304561)
+
+
+Beginning initial fit attempt
+Fit attempt 0, fit=674.376601044165, new current best! (was 935.197546229974)
+
+
+Beginning initial fit attempt
+Fit attempt 0, fit=14.8502289550343, new current best! (was 309.84850681597)
+Beginning fit attempt 1 of at maximum 10 extra tries
+Fit attempt 1, fit=14.8502289550283, new current best! (was 14.8502289550343)
+Beginning fit attempt 2 of at maximum 10 extra tries
+Beginning fit attempt 3 of at maximum 10 extra tries
+Beginning fit attempt 4 of at maximum 10 extra tries
+Beginning fit attempt 5 of at maximum 10 extra tries
+Beginning fit attempt 6 of at maximum 10 extra tries
+Beginning fit attempt 7 of at maximum 10 extra tries
+Beginning fit attempt 8 of at maximum 10 extra tries
+Beginning fit attempt 9 of at maximum 10 extra tries
+Beginning fit attempt 10 of at maximum 10 extra tries
+
+
[32m✔
[39m Tree construction finished [took 1s].
Now we find p1
as the only predictor that yields
subgroups that pass the measurement invariance test. Even though we have
chosen the four factor loadings as local.invariance
@@ -379,11 +373,11 @@
Now, we grow a tree without constraints:
tree.biv <- semtree(model.biv, data=df.biv)
-#> > Model was not run. Estimating parameters now.
+#> ❯ Model was not run. Estimating parameters now.
#>
Beginning initial fit attempt
Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
@@ -420,29 +414,29 @@ Focus Parameters
Beginning initial fit attempt
-Fit attempt 0, fit=3454.12434636158, new current best! (was 4066.88531949563)
+Fit attempt 0, fit=3454.12434636158, new current best! (was 4066.88531930229)
Beginning initial fit attempt
-Fit attempt 0, fit=1555.54412300078, new current best! (was 1720.05414322814)
+Fit attempt 0, fit=1555.54412300078, new current best! (was 1720.05414323192)
Beginning initial fit attempt
-Fit attempt 0, fit=1569.26472590267, new current best! (was 1734.07020313343)
+Fit attempt 0, fit=1569.26472590267, new current best! (was 1734.07020312965)
Beginning initial fit attempt
-Fit attempt 0, fit=3566.5692080098, new current best! (was 4167.0405063558)
-
+Fit attempt 0, fit=3566.5692080098, new current best! (was 4167.04050654914)
+
Beginning initial fit attempt
-Fit attempt 0, fit=1593.91684303245, new current best! (was 1780.60715330577)
+Fit attempt 0, fit=1593.91684303245, new current best! (was 1780.60715331027)
Beginning initial fit attempt
-Fit attempt 0, fit=1576.27862642528, new current best! (was 1785.96205470403)
+Fit attempt 0, fit=1576.27862642528, new current best! (was 1785.96205469953)
-
[32m✔
[39m Tree construction finished [took 3s].
As expected, we obtain a tree structure that has both p1
and p2
(here we use the viridis colors to give each leaf
node a different frame color, which we’ll use later again):
tree.biv2 <- semtree(model.biv, df.biv, constraints=
semtree.constraints(focus.parameters = "mu1"))
-#> > Model was not run. Estimating parameters now.
+#> ❯ Model was not run. Estimating parameters now.
#>
Beginning initial fit attempt
Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
@@ -493,13 +487,13 @@ Focus Parameters
Beginning initial fit attempt
-Fit attempt 0, fit=3740.92185296731, new current best! (was 4086.36288876948)
+Fit attempt 0, fit=3740.92185296731, new current best! (was 4086.36288893237)
Beginning initial fit attempt
-Fit attempt 0, fit=3795.16307144921, new current best! (was 4147.56293708195)
+Fit attempt 0, fit=3795.16307144921, new current best! (was 4147.56293691906)
-
[32m✔
[39m Tree construction finished [took 2s].
+
[32m✔
[39m Tree construction finished [took less than a second].
plot(tree.biv2)
tree.biv3 <- semtree(model.biv, df.biv, constraints=
semtree.constraints(focus.parameters = "mu2"))
-#> > Model was not run. Estimating parameters now.
+#> ❯ Model was not run. Estimating parameters now.
#>
Beginning initial fit attempt
Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
@@ -522,13 +516,13 @@ Focus Parameters
Beginning initial fit attempt
-Fit attempt 0, fit=3454.12434636158, new current best! (was 4066.88531949563)
+Fit attempt 0, fit=3454.12434636158, new current best! (was 4066.88531930229)
Beginning initial fit attempt
-Fit attempt 0, fit=3566.5692080098, new current best! (was 4167.0405063558)
-
-
[32m✔
[39m Tree construction finished [took 2s].
+Fit attempt 0, fit=3566.5692080098, new current best! (was 4167.04050654914)
+
+
[32m✔
[39m Tree construction finished [took less than a second].
And, indeed, we see only grp2
as predictor whereas
grp1
was not selected this time.
@@ -541,7 +535,7 @@Focus Parameters
tree.biv4 <- semtree(model.biv, df.biv, constraints= semtree.constraints(focus.parameters = "VAR_x2")) -#> > Model was not run. Estimating parameters now. +#> ❯ Model was not run. Estimating parameters now. #> Beginning initial fit attempt Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595) @@ -550,7 +544,7 @@
Focus ParametersBeginning initial fit attempt Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158) - [32m✔ [39m Tree construction finished [took 1s]. + [32m✔ [39m Tree construction finished [took less than a second]. plot(tree.biv4)
Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.6.
diff --git a/docs/articles/constraints_files/figure-html/plbv2-1.png b/docs/articles/constraints_files/figure-html/plbv2-1.png index 9f97b31..2441608 100644 Binary files a/docs/articles/constraints_files/figure-html/plbv2-1.png and b/docs/articles/constraints_files/figure-html/plbv2-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-10-1.png index aaf7f14..ad1756f 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-10-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-11-1.png index 9b4d438..e3d44d2 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-11-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-12-1.png index 10cdbd1..7585b2c 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-12-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-14-1.png index 26296be..88d33d5 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-14-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-15-1.png index fea4170..41ba7dc 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-15-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-15-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-4-1.png index ecf3969..66505ec 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-6-1.png index 3e1eb5f..2a78766 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/forests.html b/docs/articles/forests.html index c698c65..a3063da 100644 --- a/docs/articles/forests.html +++ b/docs/articles/forests.html @@ -94,7 +94,7 @@vignettes/forests.Rmd
forests.Rmd
-control <- semforest.control(num.trees = 5)
+control <- semforest_control(num.trees = 5)
print(control)
#> SEM-Forest control:
#> -----------------
@@ -413,8 +413,10 @@ Variable importancevim <- varimp(forest)
print(vim, sort.values=TRUE)
#> Variable Importance
-#> Study PA2 Film NA2 state1 TA2
-#> 0.00000 28.06156 35.63105 38.45805 42.92733 45.01637
+#> Study PA2 state1 TA2 Film
+#> -9.659311e-08 9.418006e+00 1.218599e+01 2.066494e+01 2.537268e+01
+#> NA2
+#> 3.658740e+01
plot(vim)
From this, we can learn that variables such as NA2
@@ -443,7 +445,7 @@
Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.6.
diff --git a/docs/articles/forests_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/forests_files/figure-html/unnamed-chunk-6-1.png index 13f899b..8d75b91 100644 Binary files a/docs/articles/forests_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/forests_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/getting-started.html b/docs/articles/getting-started.html index 488ced1..b2b6bf1 100644 --- a/docs/articles/getting-started.html +++ b/docs/articles/getting-started.html @@ -108,6 +108,7 @@
library(semtree)
#> Loading required package: OpenMx
+#> OpenMx may run faster if it is compiled to take advantage of multiple cores.
library(OpenMx)
Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.6.
vignettes/score-based-tests.Rmd
score-based-tests.Rmd
DeltaPA
.
library(OpenMx)
+#> OpenMx may run faster if it is compiled to take advantage of multiple cores.
manifests<-c("DeltaPA")
latents<-c()
model <- mxModel("Simple Model",
@@ -257,8 +258,8 @@ Create simple model of state anxie
#> RMSEA: 0 [95% CI (NA, NA)]
#> Prob(RMSEA <= 0.05): NA
#> To get additional fit indices, see help(mxRefModels)
-#> timestamp: 2024-03-25 12:06:18
-#> Wall clock time: 0.112819 secs
+#> timestamp: 2024-04-15 22:33:34
+#> Wall clock time: 0.02639604 secs
#> optimizer: SLSQP
#> OpenMx version number: 2.21.1
#> Need help? See help(mxSummary)
Site built with pkgdown 2.0.7.
+Site built with pkgdown 2.0.6.
diff --git a/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-6-1.png index 2215fd0..467c4b7 100644 Binary files a/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-7-1.png index 281ee5e..bdc024d 100644 Binary files a/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-8-1.png index e9f8fa0..1064335 100644 Binary files a/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-8-1.png and b/docs/articles/score-based-tests_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/docs/articles/semforest-focus.html b/docs/articles/semforest-focus.html index 9e2203a..3defd3c 100644 --- a/docs/articles/semforest-focus.html +++ b/docs/articles/semforest-focus.html @@ -94,7 +94,7 @@vignettes/semforest-focus.Rmd
semforest-focus.Rmd
library(semtree)
#> Loading required package: OpenMx
+#> OpenMx may run faster if it is compiled to take advantage of multiple cores.
set.seed(123)
N <- 1000
grp1 <- factor(sample(x = c(0,1), size=N, replace=TRUE))
@@ -154,11 +155,11 @@ 2024-03-25
#>
#> free parameters:
#> name matrix row col Estimate Std.Error A
-#> 1 VAR_x1 S x1 x1 4.0583666 0.18149512
-#> 2 COV_x1_x2 S x1 x2 0.1970528 0.11385399
-#> 3 VAR_x2 S x2 x2 3.1848900 0.14243121
-#> 4 mu1 M 1 x1 1.4858354 0.06370452
-#> 5 mu2 M 1 x2 1.4551364 0.05643442
+#> 1 VAR_x1 S x1 x1 4.0583666 0.18149573
+#> 2 COV_x1_x2 S x1 x2 0.1970528 0.11384948
+#> 3 VAR_x2 S x2 x2 3.1848900 0.14243161
+#> 4 mu1 M 1 x1 1.4858354 0.06370544
+#> 5 mu2 M 1 x2 1.4551364 0.05643474
#>
#> Model Statistics:
#> | Parameters | Degrees of Freedom | Fit (-2lnL units)
@@ -176,8 +177,8 @@ 2024-03-25
#> RMSEA: 0 [95% CI (NA, NA)]
#> Prob(RMSEA <= 0.05): NA
#> To get additional fit indices, see help(mxRefModels)
-#> timestamp: 2024-03-25 12:06:44
-#> Wall clock time: 0.123879 secs
+#> timestamp: 2024-04-15 22:33:42
+#> Wall clock time: 0.02595901 secs
#> optimizer: SLSQP
#> OpenMx version number: 2.21.1
#> Need help? See help(mxSummary)
@@ -240,7 +241,7 @@ To install the latest semtree package directly from GitHub, copy the following line into R:
library(devtools)
-devtools::install_github("semtree/brandmaier")
+devtools::install_github("brandmaier/semtree")
# even better: install with package vignette (extra documentation)
devtools::install_github("brandmaier/semtree",force=TRUE, build_opts = c())
Package documentation and use-cases with runnable R code can be found on our github pages: https://brandmaier.github.io/semtree/.
-You may also want to visit the semtree website: https://brandmaier.de/semtree
Package vignettes (shipped with the package) contain documentation on how to use the package. Simply type this in R once you have loaded the package:
@@ -208,7 +207,7 @@semforest_control()
over semforest.control()
and semtree_control()
over semtree.control()
+mtry
in forests (if NULL
) and for choosing min.N
and min.bucket
(if NULL
)ctsemOMX
to suggested packagevarimp
, such that na.omit=TRUE, which is consistent with other packages like party or partykittoTable()
-command, by default, all parameters are shown now, also fixed a bug with score-based tests and toTable()BORUTA algorithm for SEM trees
+boruta(
+ model,
+ data,
+ control = NULL,
+ predictors = NULL,
+ percentile_threshold = 100,
+ rounds = 1,
+ ...
+)
A template model specification from OpenMx
using
+the mxModel
function (or a lavaan
model
+using the lavaan
function with option fit=FALSE).
+Model must be syntactically correct within the framework chosen, and
+converge to a solution.
Data.frame used in the model creation using
+mxModel
or lavaan
are input here. Order
+of modeled variables and predictors is not important when providing a
+dataset to semtree
.
semtree
model specifications from
+semtree.control
are input here. Any changes from the default
+setting can be specified here.
Numeric.
Numeric. Number of rounds of the BORUTA algorithm.
numeric from interval (0, 1) specifying start of trimmed -sample period. With the default +sample period. With the default from = 0.15 the first and last 15 percent of observations are trimmed. This is only needed for continuous covariates.
numeric. Number of replications used for simulating from the asymptotic +
numeric. Number of replications used for simulating from the asymptotic distribution (passed to efpFunctional). Only needed for ordinal covariates.
biodiversity()
Quantify bio diversity of a SEM Forest
BORUTA algorithm for SEM trees
se()
SEMtrees Parameter Estimates Standard Error Table
SEM Forest Control Object
Create a SEM Forest
SEM Forest Control Object
SEM Tree Package
SEM Tree: Recursive Partitioning for Structural Equation Models
semtree.control()
SEM Tree Control Object
SEM Tree: Recursive Partitioning for Structural Equation Models
Integer. If mc
is not NULL
, the function will sample
-mc
number of rows from data
with replacement, to estimate
+mc
number of rows from data
with replacement, to estimate
marginal dependency using Monte Carlo integration. This is less
computationally expensive.
Integer. If mc
is not NULL
, the function will sample
-mc
number of rows from data
with replacement, to estimate
+mc
number of rows from data
with replacement, to estimate
marginal dependency using Monte Carlo integration. This is less
computationally expensive.
semtree.control(
- method = "naive",
+ method = c("naive", "score", "fair", "fair3"),
min.N = 20,
max.depth = NA,
alpha = 0.05,
@@ -334,7 +334,7 @@ Examples