diff --git a/.Rbuildignore b/.Rbuildignore index 3202962..d93aac8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -13,3 +13,7 @@ Makefile ^pkgdown$ ^LICENSE\.md$ ^\.travis\.yml$ +misc +cran-comments.md +^CRAN-SUBMISSION$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check-windows.yaml b/.github/workflows/R-CMD-check-windows.yaml new file mode 100644 index 0000000..1d68a31 --- /dev/null +++ b/.github/workflows/R-CMD-check-windows.yaml @@ -0,0 +1,29 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check-Win + +jobs: + R-CMD-check: + runs-on: windows-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..f4b17a4 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,29 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/R-devel-check.yml b/.github/workflows/R-devel-check.yml new file mode 100644 index 0000000..ca17a96 --- /dev/null +++ b/.github/workflows/R-devel-check.yml @@ -0,0 +1,17 @@ +# check package on latest R devel version +# +on: workflow-dispatch + +name: R-CMD-check + +jobs: + devel-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 + with: + r-version: 'devel' + # Use "renv" to retrieve R version recorded in renv.lock file. + - run: 'R CMD build ./' + - run: 'R CMD check semtree*.tar.gz' diff --git a/.github/workflows/R-tests.yml b/.github/workflows/R-tests.yml new file mode 100644 index 0000000..e4b95ea --- /dev/null +++ b/.github/workflows/R-tests.yml @@ -0,0 +1,31 @@ +on: + workflow_dispatch + +name: Tests + +jobs: + document: + name: run tests + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache-version: 2 + extra-packages: | + any::testthat + any::devtools +# needs: pr-document + - name: install package + run: devtools::install_github("brandmaier/semtree") + shell: Rscript {0} + + - name: run test + run: testthat::test_dir("tests/testthat/") + shell: Rscript {0} diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml new file mode 100644 index 0000000..2c5bb50 --- /dev/null +++ b/.github/workflows/test-coverage.yml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr + needs: coverage + + - name: Test coverage + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) + shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index 7836fb7..4c4c5fb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,9 @@ inst/doc doc Meta +/doc/ +/Meta/ +cran_comments.md +CRAN* .DS_Store + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d71d611..0000000 --- a/.travis.yml +++ /dev/null @@ -1,46 +0,0 @@ -# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r - -language: R -cache: packages -dist: focal -r: - - release - - devel -r_binary_packages: - - bitops - - sets - - digest - - rpart - - rpart.plot - - plotrix - - cluster - - stringr - - lavaan - - expm - - lavaan - - tidyr - - viridis - - ggplot2 - - strucchange - - sandwich - - zoo - - crayon - - testthat - - Rcpp - - RcppEigen - - MASS - - Matrix - - StanHeaders - - mvtnorm - - xfun - - mime - - RcppParallel - - rmarkdown - - htmltools - - knitr - - rpf - - BH - - psych - - openmx - - \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 3486b46..2c97d4f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,15 +11,10 @@ Depends: R (>= 2.10), OpenMx (>= 2.6.9), Imports: - bitops, - sets, - digest, rpart, rpart.plot (>= 3.0.6), - plotrix, - cluster, - stringr, lavaan, + cluster, ggplot2, tidyr, methods, @@ -30,7 +25,6 @@ Imports: clisymbols, future.apply, data.table, - ctsemOMX, expm, gridBase Suggests: @@ -39,7 +33,9 @@ Suggests: viridis, MASS, psychTools, - testthat + testthat, + future, + ctsemOMX Description: SEM Trees and SEM Forests -- an extension of model-based decision trees and forests to Structural Equation Models (SEM). SEM trees hierarchically split empirical data into homogeneous groups each sharing similar data patterns @@ -53,9 +49,10 @@ Description: SEM Trees and SEM Forests -- an extension of model-based decision License: GPL-3 Encoding: UTF-8 LazyLoad: yes -Version: 0.9.19 -Date: 2023-03-03 -RoxygenNote: 7.3.1 +Version: 0.9.20 +Date: 2024-03-25 +RoxygenNote: 7.2.3 VignetteBuilder: knitr BugReports: https://github.com/brandmaier/semtree/issues URL: https://github.com/brandmaier/semtree +Language: en-US diff --git a/NAMESPACE b/NAMESPACE index db0d57e..0158412 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,11 +26,13 @@ S3method(print,summary.semtree) S3method(proximity,semforest) S3method(proximity,semforest_node_id) S3method(proximity,semforest_stripped) +S3method(prune,semforest) S3method(prune,semtree) S3method(strip,semforest) S3method(strip,semtree) S3method(summary,semforest) S3method(summary,semtree) +S3method(toLatex,semtree) export(biodiversity) export(boruta) export(diversityMatrix) @@ -59,10 +61,12 @@ export(prune) export(se) export(semforest) export(semforest.control) +export(semforest_control) export(semforest_score_control) export(semtree) export(semtree.constraints) export(semtree.control) +export(semtree_control) export(strip) export(subforest) export(subtree) @@ -72,7 +76,6 @@ export(varimpConvergencePlot) import(OpenMx) import(data.table) import(rpart) -importFrom(bitops,bitAnd) importFrom(data.table,data.table) importFrom(grDevices,heat.colors) importFrom(graphics,barplot) @@ -92,7 +95,6 @@ importFrom(methods,is) importFrom(parallel,clusterMap) importFrom(parallel,parLapply) importFrom(sandwich,bread) -importFrom(sets,as.set) importFrom(stats,as.dist) importFrom(stats,as.formula) importFrom(stats,cmdscale) diff --git a/NEWS.md b/NEWS.md index 0a11373..54cfdae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,12 +1,20 @@ -# semtree 0.9.20 (2023) +# semtree 0.9.20 (2024) -- changed default behavior of print function of varimp, such that na.omit=TRUE, - which is consistent with other packages like party or partykit +- added an error handler for score-based tests when the vcov matrix cannot be computed (e.g., models with Heywood cases) +- leaner package imports: removed dependency on bitops and stringr package +- prefer `semforest_control()` over `semforest.control()` and `semtree_control()` over `semtree.control()` +- added heuristics for choosing `mtry` in forests (if `NULL`) and for choosing `min.N` and `min.bucket` (if `NULL`) +- moved dependency on `ctsemOMX` to suggested package -# semtree 0.9.19 (2022) +# semtree 0.9.19 (2023) +- changed default behavior of print function of `varimp`, such that na.omit=TRUE, which is consistent with other packages like party or partykit +- fixed issues with `toTable()`-command, by default, all parameters are shown now, also fixed a bug with score-based tests and toTable() +- fixed problem with focus-parameters and variable importance - bugfix in score-based tests that sometimes did not respect min.N constraints - new functionality for parameter contribution evaluation +- more verbose vignettes +- removed dependency on set, plotrix and digest package to make package imports leaner # semtree 0.9.18 (2022) @@ -65,6 +73,6 @@ - deprecated partialDependencePlot and introduced partialDependence() function with S3 plotting method - added parallel computation option to partialDependence - added new demo scripts -- added extra.legend paramter to varimpConvergencePlot +- added extra.legend parameter to varimpConvergencePlot - bugfix in traverse() that led to underestimations of variable importance in some cases - added error message when trying to use lavaan and global constraints diff --git a/R/OpenMx_scores_input.R b/R/OpenMx_scores_input.R index 48aa1c9..7a28fdc 100644 --- a/R/OpenMx_scores_input.R +++ b/R/OpenMx_scores_input.R @@ -5,6 +5,25 @@ OpenMx_scores_input <- function(x, control) { p_star <- p * (p + 1) / 2 p_star_means <- p * (p + 3) / 2 + # AB: give pseudo-labels to matrices if + # unlabelled parameters are given + candidate_param_id <- which(startsWith(x=names(x$output$estimate), prefix=x$name)) + if (length(candidate_param_id)>0) { + for (k in candidate_param_id) { + candidate_param_name <- names(x$output$estimate)[k] + cplen <- nchar(x$name) + candidate_matrix <- substr(candidate_param_name, cplen+2,cplen+2) + candidate_pos <- as.integer(strsplit(substr(candidate_param_name, cplen+4, nchar(candidate_param_name)-1),",")[[1]]) + if (candidate_matrix=="A") { + x$A$labels[candidate_pos[1], candidate_pos[2]]<-candidate_param_name + } else if (candidate_matrix=="S") { + x$S$labels[candidate_pos[1], candidate_pos[2]]<-candidate_param_name + } else if (candidate_matrix == "M") { + x$M$labels[candidate_pos]<-candidate_param_name + } + } + } + if (control$linear | imxHasDefinitionVariable(x)) { param_names <- names(x$output$estimate) diff --git a/R/aggregateVarimp.R b/R/aggregateVarimp.R index b63969e..9a409de 100644 --- a/R/aggregateVarimp.R +++ b/R/aggregateVarimp.R @@ -1,4 +1,3 @@ - aggregateVarimp <- function(vimp, aggregate = "mean", diff --git a/R/bootstrap.R b/R/bootstrap.R index 773d087..b1e7b5b 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -5,15 +5,15 @@ forest.sample <- return.oob = F, type = "bootstrap") { if (mtry > 0) { - #cov.ids <- which(names(dataset) %in% covariates) + # cov.ids <- which(names(dataset) %in% covariates) # sample length(cov.ids)-mtry of those to exclude - #rem.ids <- sample(cov.ids, length(cov.ids)-mtry) - #dataset <- dataset[, -rem.ids] + # rem.ids <- sample(cov.ids, length(cov.ids)-mtry) + # dataset <- dataset[, -rem.ids] dataset <- sampleColumns(dataset, covariates, mtry) } - + N <- dim(dataset)[1] - + if (type == "bootstrap") { indices <- sample(1:N, N, replace = T) } else if (type == "subsample") { @@ -23,12 +23,11 @@ forest.sample <- } bootstrap.data <- dataset[indices, ] oob.indices <- setdiff(1:N, unique(indices)) - oob.data <- dataset[oob.indices,] + oob.data <- dataset[oob.indices, ] if (return.oob) { return(list(bootstrap.data = bootstrap.data, oob.data = oob.data)) } else { return(bootstrap.data) } - - } \ No newline at end of file + } diff --git a/R/boruta.R b/R/boruta.R index cfe0ff5..0ba714b 100644 --- a/R/boruta.R +++ b/R/boruta.R @@ -1,4 +1,3 @@ - #' Run the Boruta algorithm on a sem tree #' #' Grows a series of SEM Forests following the boruta algorithm to determine @@ -28,6 +27,7 @@ #' #' @keywords tree models multivariate #' @export + boruta <- function(model, data, control = NULL, @@ -38,22 +38,63 @@ boruta <- function(model, verbose=FALSE, ...) { - - # TODO: make sure that no column names start with "shadow_" prefix + # initial checks + stopifnot(percentile_threshold>=0) + stopifnot(percentile_threshold<=1) + stopifnot(is.numeric(rounds)) + stopifnot(rounds>0) + + preds_important <- c() + preds_unimportant <- c() + + cur_round = 1 + temp_vims <- list() + + while(cur_round <= rounds) { + vim_boruta <- .boruta(model=model, + data=data, + control=control, + predictors=predictors, + percentile_threshold = percentile_threshold, + ...) + browser() + # add predictors to list of unimportant variables + preds_unimportant <- c(preds_unimportant, names(vim_boruta$filter)[!vim_boruta$filter]) + # remove them from the dataset + data <- data[, -c(preds_unimportant)] + temp_vims[[cur_round]] <-vim_boruta + } + result <- list( + preds_unimportant, + rounds = rounds + ) + + return(result) +} + +.boruta <- function(model, + data, + control = NULL, + predictors = NULL, + percentile_threshold = 1, + num_shadows = 1, + ...) { + + # make sure that no column names start with "shadow_" prefix + stopifnot(all(sapply(names(data), function(x) {!startsWith(x, "shadow_")}))) + # detect model (warning: duplicated code) - if (inherits(model,"MxModel") || inherits(model,"MxRAMModel")) { - tmp <- getPredictorsOpenMx(mxmodel=model, dataset=data, covariates=predictors) - model.ids <- tmp[[1]] - covariate.ids <- tmp[[2]] -# } else if (inherits(model,"lavaan")){ + if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) { + tmp <- getPredictorsOpenMx(mxmodel = model, dataset = data, covariates = predictors) - # } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) { -# + } else if (inherits(model,"lavaan")){ + + tmp <- getPredictorsLavaan(model, data, predictors) } else { - ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!"); + ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!") } - + # Checks on x & y from the boruta package if(length(grep('^shadow',covariate.ids)>0)) stop('Attributes with names starting from "shadow" are reserved for internal use. Please rename them.') diff --git a/R/checkControl.R b/R/checkControl.R index f098454..59f9d5d 100644 --- a/R/checkControl.R +++ b/R/checkControl.R @@ -8,12 +8,16 @@ checkControl <- function(control, fail = TRUE) { return(fail) } -check.semtree.control <- function(control, fail = TRUE) -{ +check.semtree.control <- function(control, fail = TRUE) { attr <- attributes(control)$names def.attr <- attributes(semtree.control())$names - if ((length(intersect(attr, def.attr)) != length(attr))) - { + + # add NULL-defaults + null_def <- c("min.N","min.bucket","strucchange.to") + attr <- unique(c(attr, null_def)) + def.attr <- unique(c(def.attr, null_def)) + + if ((length(intersect(attr, def.attr)) != length(attr))) { unknown <- setdiff(attr, def.attr) msg <- paste("Control object contains unknown parameters:", unknown) @@ -22,9 +26,8 @@ check.semtree.control <- function(control, fail = TRUE) stop() } else { ui_warn(msg) - + return(FALSE) - } } else { temp <- semtree.control() @@ -36,23 +39,20 @@ check.semtree.control <- function(control, fail = TRUE) } } } # end for - - return (TRUE) + + return(TRUE) } - - - - return (length(intersect(attr, def.attr)) == length(attr)) - + + + + return(length(intersect(attr, def.attr)) == length(attr)) } -check.semforest.control <- function(control, fail = TRUE) -{ +check.semforest.control <- function(control, fail = TRUE) { attr <- attributes(control)$names def.attr <- attributes(semforest.control())$names - - if ((length(intersect(attr, def.attr)) != length(attr))) - { + + if ((length(intersect(attr, def.attr)) != length(attr))) { unknown <- setdiff(attr, def.attr) msg <- paste("Control object contains unknown parameters:", unknown) @@ -61,12 +61,10 @@ check.semforest.control <- function(control, fail = TRUE) stop() } else { ui_warn(msg) - + return(FALSE) - } } else { - return (TRUE) - + return(TRUE) } -} \ No newline at end of file +} diff --git a/R/checkModel.R b/R/checkModel.R index bd2ded8..2c11479 100644 --- a/R/checkModel.R +++ b/R/checkModel.R @@ -24,6 +24,3 @@ checkModel <- function(model, control) return(TRUE); } - -#inherits(model1,"lavaan") -#model1@Fit@converged diff --git a/R/coef_ctsem.R b/R/coef_ctsem.R index ac5d1ae..06c35a8 100644 --- a/R/coef_ctsem.R +++ b/R/coef_ctsem.R @@ -1,31 +1,31 @@ -# Quick and dirty function to get untramsformed parameter estimates from a +# Quick and dirty function to get untransformed parameter estimates from a # ctsemFit object. This probably does not work for all tips of CTSEMs. -coef.ctsemFit <- function(x) { +coef.ctsemFit <- function(object, ...) { - res <- x$mxobj$output$estimate + res <- object$mxobj$output$estimate - if (any(c(x$mxobj$MANIFESTVARbase$free))) { - values <- x$mxobj$MANIFESTVAR$result[x$mxobj$MANIFESTVARbase$free] - labels <- x$mxobj$MANIFESTVARbase$labels[!is.na(x$mxobj$MANIFESTVARbase$labels)] + if (any(c(object$mxobj$MANIFESTVARbase$free))) { + values <- object$mxobj$MANIFESTVAR$result[object$mxobj$MANIFESTVARbase$free] + labels <- object$mxobj$MANIFESTVARbase$labels[!is.na(object$mxobj$MANIFESTVARbase$labels)] res[labels] <- values } - if (any(c(x$mxobj$DIFFUSIONbase$free))) { - values <- x$mxobj$DIFFUSION$result[x$mxobj$DIFFUSIONbase$free] - labels <- x$mxobj$DIFFUSIONbase$labels[!is.na(x$mxobj$DIFFUSIONbase$labels)] + if (any(c(object$mxobj$DIFFUSIONbase$free))) { + values <- object$mxobj$DIFFUSION$result[object$mxobj$DIFFUSIONbase$free] + labels <- object$mxobj$DIFFUSIONbase$labels[!is.na(object$mxobj$DIFFUSIONbase$labels)] res[labels] <- values } - if (any(c(x$mxobj$T0VARbase$free))) { - values <- x$mxobj$T0VAR$result[x$mxobj$T0VARbase$free] - labels <- x$mxobj$T0VARbase$labels[!is.na(x$mxobj$T0VARbase$labels)] + if (any(c(object$mxobj$T0VARbase$free))) { + values <- object$mxobj$T0VAR$result[object$mxobj$T0VARbase$free] + labels <- object$mxobj$T0VARbase$labels[!is.na(object$mxobj$T0VARbase$labels)] res[labels] <- values } - if (any(c(x$mxobj$TRAITVARbase$free))) { - values <- x$mxobj$TRAITVAR$result[x$mxobj$TRAITVARbase$free] - labels <- x$mxobj$TRAITVARbase$labels[!is.na(x$mxobj$TRAITVARbase$labels)] + if (any(c(object$mxobj$TRAITVARbase$free))) { + values <- object$mxobj$TRAITVAR$result[object$mxobj$TRAITVARbase$free] + labels <- object$mxobj$TRAITVARbase$labels[!is.na(object$mxobj$TRAITVARbase$labels)] res[labels] <- values } diff --git a/R/computePval_maxLR.R b/R/computePval_maxLR.R index 1b6a114..c3e0438 100644 --- a/R/computePval_maxLR.R +++ b/R/computePval_maxLR.R @@ -1,18 +1,18 @@ #' Wrapper function for computing the maxLR corrected p value #' from strucchange -#' +#' #' @param maxLR maximum of the LR test statistics #' @param q number of free SEM parameters / degrees of freedom #' @param covariate covariate under evaluation. This is important to get the level of #' measurement from the covariate and the bin size for ordinal and #' categorical covariates. #' @param from numeric from interval (0, 1) specifying start of trimmed -#' sample period. With the default +#' sample period. With the default #' from = 0.15 the first and last 15 percent of observations are #' trimmed. This is only needed for continuous covariates. #' @param to numeric from interval (0, 1) specifying end of trimmed #' sample period. By default, to is 1. -#' @param nrep numeric. Number of replications used for simulating from the asymptotic +#' @param nrep numeric. Number of replications used for simulating from the asymptotic #' distribution (passed to efpFunctional). Only needed for ordinal #' covariates. #' @@ -20,23 +20,22 @@ #' @author Manuel Arnold #' @return Numeric. p value for maximally selected LR statistic - computePval_maxLR <- function(maxLR, q, covariate, from, to, nrep) { - - # Level of measurement - if (!is.factor(covariate)) { # metric - pval <- strucchange::supLM(from = from, to = to)$computePval(x = maxLR, nproc = q) - } else { - covariate <- sort(covariate) # sort covariate - covariate <- droplevels(covariate) - if (is.ordered(covariate)) { # ordinal - pval <- strucchange::ordL2BB(freq = covariate, nproc = q, - nrep = nrep)$computePval(x = maxLR, nproc = q) - } else { # categorical - pval <- strucchange::catL2BB(covariate)$computePval(x = maxLR, nproc = q) - } +computePval_maxLR <- function(maxLR, q, covariate, from, to, nrep) { + # Level of measurement + if (!is.factor(covariate)) { # metric + pval <- strucchange::supLM(from = from, to = to)$computePval(x = maxLR, nproc = q) + } else { + covariate <- sort(covariate) # sort covariate + covariate <- droplevels(covariate) + if (is.ordered(covariate)) { # ordinal + pval <- strucchange::ordL2BB( + freq = covariate, nproc = q, + nrep = nrep + )$computePval(x = maxLR, nproc = q) + } else { # categorical + pval <- strucchange::catL2BB(covariate)$computePval(x = maxLR, nproc = q) } - - pval - } - + + pval +} diff --git a/R/conditional.R b/R/conditional.R index bd69e42..08da777 100644 --- a/R/conditional.R +++ b/R/conditional.R @@ -1,74 +1,78 @@ # collect all cut-points collect.rules <- function(x, exclude) { - if (x$caption=="TERMINAL") { + if (x$caption == "TERMINAL") { return(NULL) } - + if (!x$rule$variable %in% exclude) { - c(list(x$rule), - collect.rules(x$left_child, exclude), - collect.rules(x$right_child, exclude)) + c( + list(x$rule), + collect.rules(x$left_child, exclude), + collect.rules(x$right_child, exclude) + ) } else { - c(collect.rules(x$left_child, exclude), - collect.rules(x$right_child, exclude)) + c( + collect.rules(x$left_child, exclude), + collect.rules(x$right_child, exclude) + ) } } -conditionalSample <- function(tree, dataset, id) -{ -permutation.idx <- id -rules <- collect.rules(tree, exclude=id) +conditionalSample <- function(tree, dataset, id) { + permutation.idx <- id + rules <- collect.rules(tree, exclude = id) -num.rules <- length(rules) + num.rules <- length(rules) -# if there are too many rules, sample from them -if (num.rules > 10) { - rules <- sample(x = rules,size=10,replace = FALSE) -} + # if there are too many rules, sample from them + if (num.rules > 10) { + rules <- sample(x = rules, size = 10, replace = FALSE) + } -# obtain logicals from rules -logicalS <- list() -for (i in 1:length(rules)) { -rule <- rules[[i]] -if (rule$relation=="%in%") - logicalS[[i]] <- dataset[,rule$variable] %in% rule$value -else if (rule$relation==">=") - logicalS[[i]] <- dataset[,rule$variable] >= rule$value -else - stop("Not Implemented!") -} + # obtain logicals from rules + logicalS <- list() + for (i in 1:length(rules)) { + rule <- rules[[i]] + if (rule$relation == "%in%") { + logicalS[[i]] <- dataset[, rule$variable] %in% rule$value + } else if (rule$relation == ">=") { + logicalS[[i]] <- dataset[, rule$variable] >= rule$value + } else { + stop("Not Implemented!") + } + } -if (length(logicalS)<=0) { - stop("ERROR in generating rules for resampling!") -} + if (length(logicalS) <= 0) { + stop("ERROR in generating rules for resampling!") + } -initialS <- rep(TRUE, length(logicalS[[1]])) + initialS <- rep(TRUE, length(logicalS[[1]])) -# generate all combinations and roll! -allcombs <- 2^length(rules) -for (i in 1:allcombs) { - bit.filter <- as.logical(intToBits(i))[1:num.rules] + # generate all combinations and roll! + allcombs <- 2^length(rules) + for (i in 1:allcombs) { + bit.filter <- as.logical(intToBits(i))[1:num.rules] - - tempS <- initialS - for (j in 1:length(bit.filter)) { - if (bit.filter[j]) - tempS <- tempS & logicalS[[j]] - else - tempS <- tempS & !logicalS[[j]] - } - # permute only temp - rowids <- 1:nrow(dataset) - selected.rowids <- rowids[tempS] - - if (length(selected.rowids) > 0) - { - dataset[selected.rowids,permutation.idx] <- dataset[sample(selected.rowids),permutation.idx] + + tempS <- initialS + for (j in 1:length(bit.filter)) { + if (bit.filter[j]) { + tempS <- tempS & logicalS[[j]] + } else { + tempS <- tempS & !logicalS[[j]] + } + } + # permute only temp + rowids <- 1:nrow(dataset) + selected.rowids <- rowids[tempS] + + if (length(selected.rowids) > 0) { + dataset[selected.rowids, permutation.idx] <- dataset[sample(selected.rowids), permutation.idx] + } + + + cat(bit.filter, " : ", length(selected.rowids), "\n") } - - - cat(bit.filter," : ", length(selected.rowids),"\n") -} -return(dataset) -} \ No newline at end of file + return(dataset) +} diff --git a/R/evaluateTree.R b/R/evaluateTree.R index 181489d..13e685c 100644 --- a/R/evaluateTree.R +++ b/R/evaluateTree.R @@ -1,11 +1,11 @@ #' Evaluate Tree -2LL -#' +#' #' A helper function to evaluate the negative two log-likelihood (-2LL) of leaf (terminal) nodes for a #' dataset. When given a \code{\link{semtree}} and a unique dataset, the model #' estimates -2LL for the tree parameters and data subsets that fit the tree #' branching criteria. -#' -#' +#' +#' #' @param tree A fitted \code{\link{semtree}} object #' @param test_set Dataset to fit to a fitted \code{\link{semtree}} object #' @param data_type type of data ("raw", "cov", "cor") @@ -26,32 +26,29 @@ evaluateTree <- function(tree, test_set, data_type = "raw", - leaf_ids = NULL) - { + leaf_ids = NULL) { # get a mapping of dataset rows to leaf ids if (is.null(leaf_ids)) { leaf_ids <- traverse(tree, test_set) } - + # for each leaf, calculate deviance of each data row dev <- 0 for (leaf_id in unique(leaf_ids)) { - temp_set <- test_set[leaf_ids == leaf_id,] - - + temp_set <- test_set[leaf_ids == leaf_id, ] + + leaf <- getNodeById(tree, leaf_id) - + # add up log-likelihoods dev <- dev + evaluateDataLikelihood(leaf$model, temp_set[, , drop = F], data_type) } - + result <- list() result$deviance <- dev result$num_models <- length(unique(leaf_ids)) - + return(result) - - } diff --git a/R/evaluateTreeFocus.R b/R/evaluateTreeFocus.R index 9160d87..23a15bd 100644 --- a/R/evaluateTreeFocus.R +++ b/R/evaluateTreeFocus.R @@ -4,60 +4,59 @@ # computes the difference in log-likelihoods considering focus parameters # evaluateTreeFocus <- - function(tree, test_set, data_type="raw", leaf_ids=NULL) - { - + function(tree, test_set, data_type = "raw", leaf_ids = NULL) { # get a mapping of dataset rows to leaf ids if (is.null(leaf_ids)) { leaf_ids <- traverse(tree, test_set) } - + # for each leaf, calculate deviance of each data row dev <- 0 for (leaf_id in unique(leaf_ids)) { # get all data rows from current leaf - temp_set <- test_set[leaf_ids==leaf_id, ]; - + temp_set <- test_set[leaf_ids == leaf_id, ] + # get the leaf object - leaf <- getNodeById( tree, leaf_id) - + leaf <- getNodeById(tree, leaf_id) + # test if node has a focus model if (is.null(leaf$focus.model)) { ui_warn("No focus model available!") return(NA) } - + # evaluate log-likelihood from baseline and focus model - #baseline = evaluateDataLikelihood(leaf$model, temp_set[,,drop=F], data_type ) - ll.focus = evaluateDataLikelihood(leaf$focus.model, - temp_set[,,drop=F], data_type ) - + # baseline = evaluateDataLikelihood(leaf$model, temp_set[,,drop=F], data_type ) + ll.focus <- evaluateDataLikelihood( + leaf$focus.model, + temp_set[, , drop = F], data_type + ) + # evaluate log-likelihood after permutation - - + + # add up log-likelihoods dev <- dev + ll.focus } - + result <- list() result$deviance <- dev result$num_models <- length(unique(leaf_ids)) - - return(result); - + + return(result) } # TODO: finish this block # TODO: remove earlier computation of baseline ll # compute influence of focus parameter before permutation -#ll.baseline <- eval.fun(tree, oob.data)$deviance -#ll.baseline <- fitSubmodels(tree$model, subset1, subset2, +# ll.baseline <- eval.fun(tree, oob.data)$deviance +# ll.baseline <- fitSubmodels(tree$model, subset1, subset2, # control, invariance=constraints$focus.parameters) # compute misfit of focus parameter after permutation -#ll.permuted <- eval.fun(tree, oob.data.permuted)$deviance -#ll.diff <- -ll.baseline + ll.permuted -#ui_warn("Unfinished implementation!") +# ll.permuted <- eval.fun(tree, oob.data.permuted)$deviance +# ll.diff <- -ll.baseline + ll.permuted +# ui_warn("Unfinished implementation!") -#} \ No newline at end of file +# } diff --git a/R/fairSplit.R b/R/fairSplit.R index 181445d..12333ca 100644 --- a/R/fairSplit.R +++ b/R/fairSplit.R @@ -6,43 +6,43 @@ fairSplit <- meta = NULL, constraints = NULL, ...) { - - # add a column of uniform random numbers to the data + # add a column of uniform random numbers to the data # to split into two halfes # TODO: replace with sample() of row ids n <- nrow(mydata) random <- runif(n, 0, 1) mydata <- cbind(mydata, random) - cross1 <- subset (mydata, as.numeric(mydata[, ncol(mydata)]) > 0.50) + cross1 <- subset(mydata, as.numeric(mydata[, ncol(mydata)]) > 0.50) cross2 <- - subset (mydata, as.numeric(mydata[, ncol(mydata)]) <= 0.50) + subset(mydata, as.numeric(mydata[, ncol(mydata)]) <= 0.50) cross1 <- cross1[-ncol(cross1)] cross2 <- cross2[-ncol(cross2)] mydata <- mydata[-ncol(mydata)] - + LL.btn <- c() split.btn <- c() cov.btn.names <- c() cov.btn.cols <- c() cov.type <- c() - - #Baseline model to compare subgroup fits to + + # Baseline model to compare subgroup fits to modelnew <- mxAddNewModelData(model, cross1, name = "BASE MODEL C1") LL.overall <- safeRunAndEvaluate(modelnew) - suppressWarnings(if (is.na(LL.overall)) - return(NULL)) + suppressWarnings(if (is.na(LL.overall)) { + return(NULL) + }) n.comp <- 0 - + if (control$report.level > 2) { report("Phase I - Select within variables", 1) } - + # - + # Step I - use cross validation fold 1 to evaluate all splits and # select best split # - + # iterate over all variables for (cur_col in meta$covariate.ids) { LL.baseline <- LL.overall @@ -53,51 +53,54 @@ fairSplit <- if (control$report.level > 10) { report(paste("Estimating baseline likelihood: ", LL.baseline), 2) } - - + + if (control$report.level >= 1 || control$verbose) { - ui_message(paste( - "Testing predictor", - colnames(cross1)[cur_col], - " (#", - cur_col, - "/", - ncol(cross1), - ")" - ), - 2) + ui_message( + paste( + "Testing predictor", + colnames(cross1)[cur_col], + " (#", + cur_col, + "/", + ncol(cross1), + ")" + ), + 2 + ) } - + LL.within <- base::c() within.split <- base::c() - - #case for factored covariates############################## + + # case for factored covariates############################## if (is.factor(cross1[, cur_col])) { - #unordered factors##################################### + # unordered factors##################################### if (!is.ordered(cross1[, cur_col])) { - var.type = .SCALE_CATEGORICAL - + var.type <- .SCALE_CATEGORICAL + v <- cross1[, cur_col] val.sets <- sort(unique(v)) - - + + if (length(val.sets) > 1) { - #create binaries for comparison of all combinations + # create binaries for comparison of all combinations result <- - recodeAllSubsets(cross1[, cur_col], colnames(cross1)[cur_col], use.levels = - T) - test1 <- rep(0, length(cross1[, cur_col]))#base::c() + recodeAllSubsets(cross1[, cur_col], colnames(cross1)[cur_col], + use.levels = + T + ) + test1 <- rep(0, length(cross1[, cur_col])) # base::c() test2 <- rep(NA, length(cross1[, cur_col])) - + for (j in 1:ncol(result$columns)) { - #cat("RUN",j,"\n") + # cat("RUN",j,"\n") test1 <- rep(0, length(cross1[, cur_col])) for (i in 1:length(cross1[, cur_col])) { if (isTRUE(result$columns[i, j])) { test1[i] <- 1 - } - else { + } else { test1[i] <- 0 } } @@ -107,25 +110,25 @@ fairSplit <- test2 <- test2[, -1] for (i in 1:(result$num_sets)) { LL.temp <- base::c() - #subset data for chosen value and store LL + # subset data for chosen value and store LL if (result$num_sets == 1) { - vec = test2 + vec <- test2 + } else { + vec <- test2[[i]] } - else { - vec = test2[[i]] - } - subset1 <- subset (cross1, as.numeric(vec) == 2) - subset2 <- subset (cross1, as.numeric(vec) == 1) - + subset1 <- subset(cross1, as.numeric(vec) == 2) + subset2 <- subset(cross1, as.numeric(vec) == 1) + # refit baseline model with focus parameters @TAGX # for each new potential split if (!is.null(constraints) & - (!is.null(constraints$focus.parameters))) { + (!is.null(constraints$focus.parameters))) { LL.baseline <- fitSubmodels(model, - subset1, - subset2, - control, - invariance = constraints$focus.parameters) + subset1, + subset2, + control, + invariance = constraints$focus.parameters + ) if (control$report.level > 10) { report( paste( @@ -136,17 +139,17 @@ fairSplit <- ) } } - - #catch LLR for each comparison, only if valid + + # catch LLR for each comparison, only if valid LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) - + if (nrow(subset1) + nrow(subset2) != nrow(cross1)) { message("INCONSISTENCY ERROR DETECTED") - + LL.return <- NA } - + if (!is.na(LL.return)) { LL.within <- cbind(LL.within, (LL.baseline - LL.return)) within.split <- cbind(within.split, i) @@ -154,34 +157,35 @@ fairSplit <- } } } - - - #ordered factors######################################### + + + # ordered factors######################################### if (is.ordered(cross1[, cur_col])) { - var.type = .SCALE_ORDINAL - + var.type <- .SCALE_ORDINAL + v <- cross1[, cur_col] val.sets <- sort(unique(v)) if (length(val.sets) > 1) { for (i in 2:(length(val.sets))) { LL.temp <- base::c() - #subset data for chosen value and store LL + # subset data for chosen value and store LL cond1 <- - cross1[, cur_col] > val.sets[i-1] + cross1[, cur_col] > val.sets[i - 1] cond2 <- - cross1[, cur_col] <= val.sets[i-1] - - subset1 <- subset (cross1, cond1) - subset2 <- subset (cross1, cond2) - + cross1[, cur_col] <= val.sets[i - 1] + + subset1 <- subset(cross1, cond1) + subset2 <- subset(cross1, cond2) + # refit baseline model with focus parameters @TAGX if (!is.null(constraints) & - (!is.null(constraints$focus.parameters))) { + (!is.null(constraints$focus.parameters))) { LL.baseline <- fitSubmodels(model, - subset1, - subset2, - control, - invariance = constraints$focus.parameters) + subset1, + subset2, + control, + invariance = constraints$focus.parameters + ) if (control$report.level > 10) { report( paste( @@ -192,46 +196,47 @@ fairSplit <- ) } } - - #catch LLR for each comparison + + # catch LLR for each comparison LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) if (!is.na(LL.return)) { LL.within <- cbind(LL.within, (LL.baseline - LL.return)) within.split <- - # cbind(within.split, (val.sets[i] + val.sets[(i - 1)]) / 2) - c(within.split, as.character(val.sets[i-1])) + # cbind(within.split, (val.sets[i] + val.sets[(i - 1)]) / 2) + c(within.split, as.character(val.sets[i - 1])) } } } } } - - #numeric (continuous) covariates################################ + + # numeric (continuous) covariates################################ if (is.numeric(cross1[, cur_col])) { - var.type = .SCALE_METRIC + var.type <- .SCALE_METRIC v <- as.numeric(cross1[, cur_col]) val.sets <- sort(unique(v)) - #if(length(val.sets) < 30|!isTRUE(control$shortcut)){ + # if(length(val.sets) < 30|!isTRUE(control$shortcut)){ if (length(val.sets) > 1) { for (i in 2:(length(val.sets))) { LL.temp <- base::c() - #subset data for chosen value and store LL + # subset data for chosen value and store LL cond1 <- as.numeric(cross1[, cur_col]) > (val.sets[i] + val.sets[(i - 1)]) / 2 cond2 <- as.numeric(cross1[, cur_col]) < (val.sets[i] + val.sets[(i - 1)]) / 2 - subset1 <- subset (cross1, cond1) - subset2 <- subset (cross1, cond2) - + subset1 <- subset(cross1, cond1) + subset2 <- subset(cross1, cond2) + # refit baseline model with focus parameters @TAGX if (!is.null(constraints) & - (!is.null(constraints$focus.parameters))) { + (!is.null(constraints$focus.parameters))) { LL.baseline <- fitSubmodels(model, - subset1, - subset2, - control, - invariance = constraints$focus.parameters) + subset1, + subset2, + control, + invariance = constraints$focus.parameters + ) if (control$report.level > 10) { report( paste( @@ -242,8 +247,8 @@ fairSplit <- ) } } - - #catch LLR for each comparison + + # catch LLR for each comparison LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) if (!is.na(LL.return)) { @@ -251,45 +256,49 @@ fairSplit <- within.split <- cbind(within.split, (val.sets[i] + val.sets[(i - 1)]) / 2) } else { - if (control$verbose) + if (control$verbose) { ui_fail("LL was NA when fitting submodels!") + } if (control$report.level > 2) { - report(paste("Could not estimate split at value ", val.sets[i]), - 2) + report( + paste("Could not estimate split at value ", val.sets[i]), + 2 + ) } } } } - #} + # } } - + if (control$report.level > 10) { if (!is.null(LL.within)) { report(paste("Within Likelihoods ", paste(round( LL.within, 2 ), collapse = " ")), 2) - } - else{ + } else { message("Within LLs NULL") } } - + if (control$report.level > 0) { if (is.null(LL.within)) { - report("No valid estimate found for any split value during the first round.", - 2) + report( + "No valid estimate found for any split value during the first round.", + 2 + ) } } - + max.LL.within <- base::c() max.within.split <- base::c() - - #store the LL, split value and variable number for each cov that makes a possible split + + # store the LL, split value and variable number for each cov that makes a possible split if (!is.null(LL.within)) { max.LL.within <- LL.within[1] max.within.split <- within.split[1] max.within.cov <- cur_col - + if (length(LL.within) > 1) { for (i in 2:length(LL.within)) { if (!is.na(LL.within[i]) | !is.null(LL.within[i])) { @@ -299,52 +308,51 @@ fairSplit <- } } } - } - + LL.btn <- cbind(LL.btn, max.LL.within) split.btn <- cbind(split.btn, max.within.split) - #cov.btn.names <- cbind(cov.btn.names, colnames(mydata[cur_col])) + # cov.btn.names <- cbind(cov.btn.names, colnames(mydata[cur_col])) cov.btn.cols <- cbind(cov.btn.cols, max.within.cov) cov.type <- cbind(cov.type, var.type) - + if (control$report.level >= 3) { - report(paste( - "Best split at ", - max.within.split, - " with LL", - max.LL.within - ), - 2) + report( + paste( + "Best split at ", + max.within.split, + " with LL", + max.LL.within + ), + 2 + ) } - } - } - + # # Phase II - select between variables using their best split # use cross validation fold 2 for evaluation # - #cat("PHASE II") + # cat("PHASE II") if (control$report.level > 2) { report("Phase II - Select between variables", 1) } - - #Baseline model to compare subgroup fits to + + # Baseline model to compare subgroup fits to modelnew <- mxAddNewModelData(model, cross2, name = "BASE MODEL C2") LL.overall <- safeRunAndEvaluate(modelnew) suppressWarnings(if (is.na(LL.overall)) { warning("Baseline likelihood is N/A; Aborting!") - + return(NULL) }) n.comp <- ncol(LL.btn) - + LL.max <- NULL - + if (!is.null(LL.btn)) { for (cur_col in 1:length(LL.btn)) { LL.temp <- base::c() @@ -356,12 +364,14 @@ fairSplit <- LL.baseline <- safeRunAndEvaluate(missingModel) num.rows <- dim(missingModel@data@observed)[1] } - + if (cov.type[cur_col] == .SCALE_CATEGORICAL) { if (!is.ordered(cross2[, cov.btn.cols[cur_col]])) { result <- - recodeAllSubsets(cross2[, (cov.btn.cols[cur_col])], colnames(cross2)[(cov.btn.cols[cur_col])], use.levels = - T) + recodeAllSubsets(cross2[, (cov.btn.cols[cur_col])], colnames(cross2)[(cov.btn.cols[cur_col])], + use.levels = + T + ) test1 <- base::c() clen <- dim(cross2)[1] test2 <- rep(NA, clen) @@ -370,8 +380,7 @@ fairSplit <- for (i in 1:clen) { if (isTRUE(result$columns[i, j])) { test1[i] <- 1 - } - else { + } else { test1[i] <- 0 } } @@ -379,23 +388,21 @@ fairSplit <- test2 <- data.frame(test2, test1) } test2 <- test2[, -1] - - + + if (result$num_sets == 1) { vec <- test2 - } - else { + } else { vec <- test2[[split.btn[cur_col]]] } - - - subset1 <- subset (cross2, as.numeric(vec) == 2) - subset2 <- subset (cross2, as.numeric(vec) == 1) + + + subset1 <- subset(cross2, as.numeric(vec) == 2) + subset2 <- subset(cross2, as.numeric(vec) == 1) } - } - else if (cov.type[cur_col] == .SCALE_ORDINAL) { + } else if (cov.type[cur_col] == .SCALE_ORDINAL) { cond1 <- cross2[, cov.btn.cols[cur_col]] > split.btn[cur_col] cond2 <- @@ -408,12 +415,12 @@ fairSplit <- cond2 <- cross2[, cov.btn.cols[cur_col]] < split.btn[cur_col] subset1 <- subset(cross2, cond1) - subset2 <- subset(cross2, cond2) + subset2 <- subset(cross2, cond2) } - + # refit baseline model with focus parameters @TAGX if (!is.null(constraints) & - (!is.null(constraints$focus.parameters))) { + (!is.null(constraints$focus.parameters))) { result <- fitSubmodels( model, subset1, @@ -437,10 +444,10 @@ fairSplit <- nrow(subset1) + nrow(subset2) # THIS DEFINITELY IS A HACK. SHOULD BE # DEFINED BY THE RETURNED MODEL } else { - #warning("LL.sum is NA after reestimation with focus parameter") + # warning("LL.sum is NA after reestimation with focus parameter") ui_warn( "Could not estimate constrained/focus model for variable ", -# colnames(cross2)[cur_col], + # colnames(cross2)[cur_col], names(mydata[cov.btn.cols[cur_col]]), ". Possibly choosing a suboptimal split." ) @@ -448,16 +455,18 @@ fairSplit <- num.rows <- nrow(subset1) + nrow(subset2) } } - + # evaluate split LL.cur <- - LL.baseline - fitSubmodels(model, subset1, subset2, control, invariance = - NULL) - + LL.baseline - fitSubmodels(model, subset1, subset2, control, + invariance = + NULL + ) + # cross2 == subset of data for Phase II - + if (nrow(subset1) + nrow(subset2) != num.rows) { - #browser() + # browser() ui_fail( paste( "SERIOUS INCONSISTENCY ERROR. Numbers of rows do not match. Type=" @@ -472,11 +481,12 @@ fairSplit <- ) LL.cur <- NA } - - if (control$verbose) + + if (control$verbose) { ui_message(paste("Testing", cur_col, ":", names(mydata[cov.btn.cols[cur_col]]), " ", LL.cur, "\n")) - - + } + + if (cur_col == 1) { LL.max <- LL.cur LL.btn <- LL.cur @@ -484,8 +494,7 @@ fairSplit <- name.max <- names(mydata[cov.btn.cols[1]]) col.max <- cov.btn.cols[1] type.max <- cov.type[1] - } - else { + } else { LL.btn <- cbind(LL.btn, LL.cur) if (!is.na(LL.cur) & !is.null(LL.cur)) { if (is.na(LL.max) || is.na(LL.cur) || LL.max < LL.cur) { @@ -497,15 +506,12 @@ fairSplit <- } } } - + if (control$report.level > 2) { report(paste("Name", names(mydata[cov.btn.cols[cur_col]]), "LL:", LL.cur), 2) } - } - - } - else { + } else { return(NULL) } ## @@ -516,22 +522,22 @@ fairSplit <- if (control$report.level > 2) { report("Phase III - Select Split in Best Variable", 1) } - + if (!is.null(constraints$focus.parameters)) { stop("Method 'fair3' does not yet support focus parameters in tree.constraints.") } - + # run full data on model for overall model fit modelnew <- mxAddNewModelData(model, mydata, name = "BASE MODEL FULL") LL.overall <- safeRunAndEvaluate(modelnew) suppressWarnings(if (is.na(LL.overall)) { warning("Overall likelihood is N/A; Aborting!") - + return(NULL) }) n.comp <- 0 - + # iterate over all splits in col.max cur_col <- col.max LL.baseline <- LL.overall @@ -539,47 +545,51 @@ fairSplit <- if (!is.null(missingModel)) { LL.baseline <- safeRunAndEvaluate(missingModel) } - - if (control$verbose) - ui_message("Testing ", - cur_col, - "/", - ncol(mydata), - " (", - colnames(mydata)[cur_col], - ")") - + + if (control$verbose) { + ui_message( + "Testing ", + cur_col, + "/", + ncol(mydata), + " (", + colnames(mydata)[cur_col], + ")" + ) + } + if (control$report.level >= 1) { report(paste("Testing predictor", colnames(mydata)[cur_col]), 1) } - + LL.within <- base::c() within.split <- base::c() - - #case for factored covariates############################## + + # case for factored covariates############################## if (is.factor(mydata[, cur_col])) { - #unordered factors##################################### + # unordered factors##################################### if (!is.ordered(mydata[, cur_col])) { - var.type = 1 + var.type <- 1 v <- as.numeric(mydata[, cur_col]) val.sets <- sort(union(v, v)) - #cat("Length", length(val.sets),":",paste(v),"\n") + # cat("Length", length(val.sets),":",paste(v),"\n") if (length(val.sets) > 1) { - #create binaries for comparison of all combinations + # create binaries for comparison of all combinations result <- - recodeAllSubsets(mydata[, cur_col], colnames(mydata)[cur_col], use.levels = - T) - test1 <- rep(0, length(mydata[, cur_col]))#base::c() + recodeAllSubsets(mydata[, cur_col], colnames(mydata)[cur_col], + use.levels = + T + ) + test1 <- rep(0, length(mydata[, cur_col])) # base::c() test2 <- rep(NA, length(mydata[, cur_col])) - + for (j in 1:ncol(result$columns)) { - #cat("RUN",j,"\n") + # cat("RUN",j,"\n") test1 <- rep(0, length(mydata[, cur_col])) for (i in 1:length(mydata[, cur_col])) { if (isTRUE(result$columns[i, j])) { test1[i] <- 1 - } - else { + } else { test1[i] <- 0 } } @@ -589,26 +599,25 @@ fairSplit <- test2 <- test2[, -1] for (i in 1:(result$num_sets)) { LL.temp <- base::c() - #subset data for chosen value and store LL + # subset data for chosen value and store LL if (result$num_sets == 1) { - vec = test2 - } - else { - vec = test2[[i]] + vec <- test2 + } else { + vec <- test2[[i]] } - subset1 <- subset (mydata, as.numeric(vec) == 2) - subset2 <- subset (mydata, as.numeric(vec) == 1) - - #catch LLR for each comparison, only if valid + subset1 <- subset(mydata, as.numeric(vec) == 2) + subset2 <- subset(mydata, as.numeric(vec) == 1) + + # catch LLR for each comparison, only if valid LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) - + if (nrow(subset1) + nrow(subset2) != nrow(mydata)) { message("INCONSISTENCY ERROR DETECTED") - + LL.return <- NA } - + if (!is.na(LL.return)) { LL.within <- cbind(LL.within, (LL.baseline - LL.return)) within.split <- cbind(within.split, i) @@ -616,26 +625,26 @@ fairSplit <- } } } - - - #ordered factors######################################### + + + # ordered factors######################################### if (is.ordered(mydata[, cur_col])) { - var.type = .SCALE_ORDINAL + var.type <- .SCALE_ORDINAL v <- as.numeric(as.character(mydata[, cur_col])) val.sets <- sort(union(v, v)) if (length(val.sets) > 1) { for (i in 2:(length(val.sets))) { LL.temp <- base::c() - #subset data for chosen value and store LL + # subset data for chosen value and store LL cond1 <- as.numeric(as.character(mydata[, cur_col])) > (val.sets[i] + val.sets[(i - - 1)]) / 2 + 1)]) / 2 cond2 <- as.numeric(as.character(mydata[, cur_col])) < (val.sets[i] + val.sets[(i - - 1)]) / 2 - subset1 <- subset (mydata, cond1) - subset2 <- subset (mydata, cond2) - #catch LLR for each comparison + 1)]) / 2 + subset1 <- subset(mydata, cond1) + subset2 <- subset(mydata, cond2) + # catch LLR for each comparison LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) if (!is.na(LL.return)) { @@ -647,23 +656,23 @@ fairSplit <- } } } - - #numeric (continuous) covariates################################ + + # numeric (continuous) covariates################################ if (is.numeric(mydata[, cur_col])) { - var.type = 2 + var.type <- 2 v <- as.numeric(mydata[, cur_col]) val.sets <- sort(union(v, v)) if (length(val.sets) > 1) { for (i in 2:(length(val.sets))) { LL.temp <- base::c() - #subset data for chosen value and store LL + # subset data for chosen value and store LL cond1 <- as.numeric(mydata[, cur_col]) > (val.sets[i] + val.sets[(i - 1)]) / 2 cond2 <- as.numeric(mydata[, cur_col]) < (val.sets[i] + val.sets[(i - 1)]) / 2 - subset1 <- subset (mydata, cond1) - subset2 <- subset (mydata, cond2) - #catch LLR for each comparison + subset1 <- subset(mydata, cond1) + subset2 <- subset(mydata, cond2) + # catch LLR for each comparison LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) if (!is.na(LL.return)) { @@ -671,42 +680,46 @@ fairSplit <- within.split <- cbind(within.split, (val.sets[i] + val.sets[(i - 1)]) / 2) } else { - if (control$verbose) + if (control$verbose) { ui_fail("LL was NA when fitting submodels!") + } if (control$report.level > 2) { - report(paste("Could not estimate split at value ", val.sets[i]), - 2) + report( + paste("Could not estimate split at value ", val.sets[i]), + 2 + ) } } } } } - + if (control$report > 10) { if (!is.null(LL.within)) { message("Within LLs ", paste(round(LL.within, 2), collapse = " ")) - } - else{ + } else { message("Within LLs NULL") } } - + if (control$report.level > 0) { if (is.null(LL.within)) { - report("No valid estimate found for any split value during the first round.", - 2) + report( + "No valid estimate found for any split value during the first round.", + 2 + ) } } - + max.LL.within <- base::c() max.within.split <- base::c() - - #store the LL, split value and variable number for each cov that makes a possible split + + # store the LL, split value and variable number for each cov that makes a possible split if (!is.null(LL.within)) { max.LL.within <- LL.within[1] max.within.split <- within.split[1] max.within.cov <- cur_col - + if (length(LL.within) > 1) { for (i in 2:length(LL.within)) { if (!is.na(LL.within[i]) | !is.null(LL.within[i])) { @@ -716,46 +729,49 @@ fairSplit <- } } } - } - #max.LL.within - #max.within.split + # max.LL.within + # max.within.split split.max <- max.within.split LL.max <- max.LL.within - + if (control$report.level >= 3) { - report(paste( - "Best split at ", - max.within.split, - " with LL", - max.LL.within - ), - 2) + report( + paste( + "Best split at ", + max.within.split, + " with LL", + max.LL.within + ), + 2 + ) } } } - + btn.matrix <- rbind(LL.btn, names(mydata[c(cov.btn.cols)]), cov.btn.cols, split.btn) colnames(btn.matrix) <- c(paste("var", seq(1, ncol(btn.matrix)), sep = "")) rownames(btn.matrix) <- c("LR", "variable", "column", "split val") - + if (control$report.level >= 2) { report("_____________________________", 1) - report(paste( - "Best predictor", - name.max, - "split at", - split.max, - "with LL", - LL.max - ), - 1) + report( + paste( + "Best predictor", + name.max, + "split at", + split.max, + "with LL", + LL.max + ), + 1 + ) report("", 1) } - - #cat("Best ",LL.max, " ",split.max," ",name.max," ",col.max,"\n") + + # cat("Best ",LL.max, " ",split.max," ",name.max," ",col.max,"\n") return( list( LL.max = LL.max, diff --git a/R/findDefinitionVariables.R b/R/findDefinitionVariables.R index 991a5f9..fde409f 100644 --- a/R/findDefinitionVariables.R +++ b/R/findDefinitionVariables.R @@ -1,6 +1,6 @@ findDefinitionVariables <- function(model) { def_vars <- c() - for (mid in 1:length(m@matrices)) { + for (mid in 1:length(model@matrices)) { mt <- model@matrices[[mid]] idx <- startsWith(mt$labels,"data.") if (any(idx,na.rm = TRUE)) { diff --git a/R/fitSubmodels.R b/R/fitSubmodels.R index 8fbc30d..aec7f81 100644 --- a/R/fitSubmodels.R +++ b/R/fitSubmodels.R @@ -237,10 +237,13 @@ fitSubmodels <- function(model, newlabels1 <- names(omxGetParameters(model1)) newlabels2 <- names(omxGetParameters(model2)) + # make safe names for variables (should probably be + # all non-alpha-numeric?) + # replace square brackets, dots, commas by underscores newlabels1 <- - stringr::str_replace_all(newlabels1, "\\[|\\]|,|\\.", "_") + gsub("\\[|\\]|\\.|,", "_", newlabels1) newlabels2 <- - stringr::str_replace_all(newlabels2, "\\[|\\]|,|\\.", "_") + gsub("\\[|\\]|\\.|,", "_", newlabels2) # replace labels eqids <- which(newlabels1 %in% invariance) @@ -299,7 +302,7 @@ fitSubmodels <- function(model, mxFitFunctionAlgebra('h12') ) - sharedRun <- mxRun(sharedModel, silent = T) + sharedRun <- mxRun(sharedModel, silent = TRUE) LL.sum <- mxEval(h12, sharedRun) diff --git a/R/gefp_semtree.R b/R/gefp_semtree.R index 802c3c7..feea5c4 100644 --- a/R/gefp_semtree.R +++ b/R/gefp_semtree.R @@ -1,6 +1,6 @@ -gefp_semtree <- function (..., fit = NULL, scores, vcov = NULL, - decorrelate = TRUE, sandwich = TRUE, order.by = NULL, - fitArgs = NULL, parm = NULL, data = list()) { +gefp_semtree <- function(..., fit = NULL, scores, vcov = NULL, + decorrelate = TRUE, sandwich = TRUE, order.by = NULL, + fitArgs = NULL, parm = NULL, data = list()) { vcov. <- vcov fm <- list(...) fm <- fm[[1]] @@ -8,47 +8,56 @@ gefp_semtree <- function (..., fit = NULL, scores, vcov = NULL, k <- NCOL(scores) z <- order.by order.name <- deparse(substitute(order.by)) - if (is.factor(z)) + if (is.factor(z)) { z <- as.numeric(z) + } scores <- as.matrix(scores) - if (inherits(z, "POSIXt")) - z <- suppressWarnings(c(z[1] + as.numeric(difftime(z[1], - z[2], units = "secs")), z)) - else z <- suppressWarnings(c(z[1] - as.numeric(diff(z[1:2])), - z)) - process <- scores/sqrt(n) + if (inherits(z, "POSIXt")) { + z <- suppressWarnings(c(z[1] + as.numeric(difftime(z[1], + z[2], + units = "secs" + )), z)) + } else { + z <- suppressWarnings(c( + z[1] - as.numeric(diff(z[1:2])), + z + )) + } + process <- scores / sqrt(n) if (is.null(vcov.)) { J <- crossprod(process) J12 <- strucchange::root.matrix(J) - } - else { + } else { if (sandwich) { - Q <- chol2inv(chol(bread(fm)/n)) - J <- (Q %*% vcov.(fm, order.by = order.by, data = data) %*% - Q)/n + Q <- chol2inv(chol(bread(fm) / n)) + J <- (Q %*% vcov.(fm, order.by = order.by, data = data) %*% + Q) / n J12 <- strucchange::root.matrix(J) - } - else { + } else { J12 <- vcov. } } process <- rbind(0, process) process <- apply(process, 2, cumsum) - if (decorrelate) + if (decorrelate) { process <- t(chol2inv(chol(J12)) %*% t(process)) - else { - process <- t(1/sqrt(diag(J)) * t(process)) - if (length(parm) > 1) + } else { + process <- t(1 / sqrt(diag(J)) * t(process)) + if (length(parm) > 1) { stop("limiting process is not a Brownian bridge") + } } colnames(process) <- colnames(scores) - if (!is.null(parm)) + if (!is.null(parm)) { process <- process[, parm] - retval <- list(process = suppressWarnings(zoo::zoo(process, z)), - nreg = k, nobs = n, call = match.call(), fit = fit, scores = scores, - fitted.model = fm, par = NULL, lim.process = "Brownian bridge", - type.name = "M-fluctuation test", order.name = order.name, - J12 = J12) + } + retval <- list( + process = suppressWarnings(zoo::zoo(process, z)), + nreg = k, nobs = n, call = match.call(), fit = fit, scores = scores, + fitted.model = fm, par = NULL, lim.process = "Brownian bridge", + type.name = "M-fluctuation test", order.name = order.name, + J12 = J12 + ) class(retval) <- "gefp" return(retval) } diff --git a/R/getLikelihood.R b/R/getLikelihood.R index ae17908..17afe9f 100644 --- a/R/getLikelihood.R +++ b/R/getLikelihood.R @@ -1,15 +1,13 @@ -getLikelihood<-function(model) -{ +getLikelihood <- function(model) { # this seems to be not generic enough # return(OpenMx::mxEval(objective, model)); - - msm <- getS3method("summary","MxModel") + + msm <- getS3method("summary", "MxModel") # alternative: if (is.null(model)) { warning("NULL Model in getLikelihood()-call") return(NULL) } - + return(msm(model)$Minus2LogLikelihood) - - } \ No newline at end of file +} diff --git a/R/getNumNodes.R b/R/getNumNodes.R index 212b4c0..06642cb 100644 --- a/R/getNumNodes.R +++ b/R/getNumNodes.R @@ -1,34 +1,31 @@ #' Tree Size -#' +#' #' Counts the number of nodes in a tree. -#' -#' +#' +#' #' @param tree A SEM tree object. #' @author Andreas M. Brandmaier, John J. Prindle #' @references Brandmaier, A.M., Oertzen, T. v., McArdle, J.J., & Lindenberger, #' U. (2013). Structural equation model trees. \emph{Psychological Methods}, #' 18(1), 71-86. getNumNodes <- -function(tree) -{ - if ((is.null(tree$left_child)) && (is.null(tree$right_child))) - { - return(1); - } + function(tree) { + if ((is.null(tree$left_child)) && (is.null(tree$right_child))) { + return(1) + } - count <- 1 - - if (tree$left_child$caption != "TERMINAL") { - count <- count + getNumNodes(tree$left_child) - } else { - count <- count + 1 - } - if (tree$right_child$caption != "TERMINAL") { - count <- count + getNumNodes(tree$right_child) - } else { - count <- count + 1 - } - - return(count) - -} + count <- 1 + + if (tree$left_child$caption != "TERMINAL") { + count <- count + getNumNodes(tree$left_child) + } else { + count <- count + 1 + } + if (tree$right_child$caption != "TERMINAL") { + count <- count + getNumNodes(tree$right_child) + } else { + count <- count + 1 + } + + return(count) + } diff --git a/R/getParDiffForest.R b/R/getParDiffForest.R index 5785a98..85d2f1a 100644 --- a/R/getParDiffForest.R +++ b/R/getParDiffForest.R @@ -3,7 +3,7 @@ #' differences between post-split nodes. #' @param forest a semforest object. #' @param measure a character. "wald" (default) gives the squared parameter -#' differences devided by their pooled standard errors. test" gives the +#' differences divided by their pooled standard errors. test" gives the #' contributions of the parameters to the test statistic. "raw" gives the #' absolute values of the parameter differences. #' @param normalize logical value; if TRUE parameter differences of each split diff --git a/R/getParDiffTree.R b/R/getParDiffTree.R index f9b2791..264df55 100644 --- a/R/getParDiffTree.R +++ b/R/getParDiffTree.R @@ -3,7 +3,7 @@ #' differences between post-split nodes. #' @param tree a semtree object. #' @param measure a character. "wald" (default) gives the squared parameter -#' differences devided by their pooled standard errors. "test" gives the +#' differences divided by their pooled standard errors. "test" gives the #' contributions of the parameters to the test statistic."raw" gives the #' absolute values of the parameter differences. #' @param normalize logical value; if TRUE parameter differences of each split diff --git a/R/getPredictorsLavaan.R b/R/getPredictorsLavaan.R index 06ce472..f6c6288 100644 --- a/R/getPredictorsLavaan.R +++ b/R/getPredictorsLavaan.R @@ -7,7 +7,7 @@ getPredictorsLavaan <- function(model, dataset, covariates) model.ids[i] <- which(model@Data@ov.names[[1]][i] == names(dataset)); } all.ids <- 1:length(names(dataset)) - cvid <- sets::as.set(all.ids)-sets::as.set(model.ids) + cvid <- all.ids[!all.ids %in% model.ids] if (length(cvid)==0) { ui_stop("No covariates contained in dataset!") } @@ -18,7 +18,7 @@ getPredictorsLavaan <- function(model, dataset, covariates) all.ids <- 1:length(names(dataset)) covariate.ids <- sapply(covariates, function(cv) { which(cv==names(dataset))} ) - modid <- sets::as.set(all.ids)-sets::as.set(covariate.ids) + modid <- all.ids[!all.ids %in% covariate.ids] if (length(modid)==0) { ui_stop("No covariates available to split on!") } diff --git a/R/getPredictorsOpenMx.R b/R/getPredictorsOpenMx.R index addb34d..3df10e5 100644 --- a/R/getPredictorsOpenMx.R +++ b/R/getPredictorsOpenMx.R @@ -12,7 +12,7 @@ getPredictorsOpenMx <- function(mxmodel, dataset, covariates) model.ids[i] <- which(cmp); } all.ids <- 1:length(names(dataset)) - cvid <- sets::as.set(all.ids)-sets::as.set(model.ids) + cvid <- all.ids[!all.ids %in% model.ids] if (length(cvid)==0) { ui_stop("Error. No predictors contained in dataset!") } @@ -23,7 +23,7 @@ getPredictorsOpenMx <- function(mxmodel, dataset, covariates) all.ids <- 1:length(names(dataset)) covariate.ids <- sapply(covariates, function(cv) { which(cv==names(dataset))} ) - modid <- sets::as.set(all.ids)-sets::as.set(covariate.ids) + modid <- all.ids[!all.ids %in% covariate.ids] if (length(modid)==0) { ui_stop("No covariates contained in dataset!") } diff --git a/R/growTree.R b/R/growTree.R index e6fb930..630426b 100644 --- a/R/growTree.R +++ b/R/growTree.R @@ -11,30 +11,28 @@ # # # -growTree <- function(model=NULL, mydata=NULL, - control=NULL, invariance=NULL, meta=NULL, - edgelabel=NULL, depth=0, constraints=NULL, ...) -{ - - if(is.null(mydata)) { +growTree <- function(model = NULL, mydata = NULL, + control = NULL, invariance = NULL, meta = NULL, + edgelabel = NULL, depth = 0, constraints = NULL, ...) { + if (is.null(mydata)) { stop("There was no data for growing the tree") } - + if (is.null(meta)) { - warning("SEM tree could not determine model variables and covariates in the dataset."); - return(NULL); + warning("SEM tree could not determine model variables and covariates in the dataset.") + return(NULL) } - + if (control$verbose) { - ui_message("Growing level ",depth," n = ",nrow(mydata)); + ui_message("Growing level ", depth, " n = ", nrow(mydata)) } - - if (control$report.level>0) { - report(paste("Growing tree level",depth), 0) + + if (control$report.level > 0) { + report(paste("Growing tree level", depth), 0) } - - - + + + # Node null settings in testing for significant splitting node <- list() node$left_child <- NULL @@ -42,147 +40,195 @@ growTree <- function(model=NULL, mydata=NULL, node$caption <- "TERMINAL" node$N <- dim(mydata)[1] class(node) <- "semtree" - + # -- sample columns in case of SEM forest usage -- fulldata <- mydata fullmeta <- meta if (control$mtry > 0) { - # get names of model variables before sampling model.names <- names(mydata)[meta$model.ids] covariate.names <- names(mydata)[meta$covariate.ids] - #perform sampling + # perform sampling mydata <- sampleColumns(mydata, names(mydata)[meta$covariate.ids], control$mtry) # get new model ids after sampling by name - meta$model.ids <- sapply(model.names, function(x) {which(x==names(mydata))}) + meta$model.ids <- sapply(model.names, function(x) { + which(x == names(mydata)) + }) names(meta$model.ids) <- NULL - meta$covariate.ids <- unlist(lapply(covariate.names, function(x) {which(x==names(mydata))})) - + meta$covariate.ids <- unlist(lapply(covariate.names, function(x) { + which(x == names(mydata)) + })) + node$colnames <- colnames(mydata) if (control$verbose) { - ui_message("Subsampled predictors: ",paste(node$colnames[meta$covariate.ids])) + ui_message("Subsampled predictors: ", paste(node$colnames[meta$covariate.ids])) } } - # determine whether split evaluation can be done on p values - node$p.values.valid <- control$method != "cv" + # override forced split? + arguments <- list(...) + if ("forced_splits" %in% names(arguments) && !is.null(arguments$forced_splits)) { + forced_splits <- arguments$forced_splits + + # get names of model variables before forcing + model.names <- names(mydata)[meta$model.ids] + covariate.names <- names(mydata)[meta$covariate.ids] + + # select subset with model variables and single, forced predictor + forcedsplit.name <- forced_splits[1] + + if (control$verbose) { + cat("FORCED split: ",forcedsplit.name,"\n") + } + + + mydata <- fulldata[, c(model.names, forcedsplit.name) ] + node$colnames <- colnames(mydata) + + # get new model ids after sampling by name + meta$model.ids <- sapply(model.names, function(x) { + which(x == names(mydata)) + }) + names(meta$model.ids) <- NULL + meta$covariate.ids <- unlist(lapply(covariate.names, function(x) { + which(x == names(mydata)) + })) + + } else { + forced_splits <- NULL + } + # determine whether split evaluation can be done on p values + node$p.values.valid <- control$method != "cv" + # set some default values for the node object node$lr <- NA node$edge_label <- edgelabel - + # Estimate model on current data set ## 31.08.2022: Model in root is not refitted if refit is set to FALSE if (depth == 0 & !control$refit & length(constraints) == 0) { # do not fit model node$model <- model } else { # fit model # OpenMx - if(control$sem.prog == 'OpenMx'){ - full.model <- mxAddNewModelData(model = model, data = mydata, - name = "BASE MODEL") + if (control$sem.prog == "OpenMx") { + full.model <- mxAddNewModelData( + model = model, data = mydata, + name = "BASE MODEL" + ) node$model <- try( - suppressMessages(OpenMx::mxTryHard(model = full.model, paste = FALSE, - silent = TRUE)), - silent = TRUE) + suppressMessages(OpenMx::mxTryHard( + model = full.model, paste = FALSE, + silent = TRUE, bestInitsOutput = FALSE + )), + silent = TRUE + ) } # lavaan - if(control$sem.prog == 'lavaan'){ + if (control$sem.prog == "lavaan") { ## 11.08.2022 Note: fits lavaan model on mydata node$model <- try(suppressWarnings(eval( - parse(text=paste("lavaan::",model@Options$model.type, - '(lavaan::parTable(model),data=mydata,missing=\'', - model@Options$missing,'\')',sep="")))),silent=T) + parse(text = paste("lavaan::", model@Options$model.type, + "(lavaan::parTable(model),data=mydata,missing='", + model@Options$missing, "')", + sep = "" + )) + )), silent = T) } ## 26.06.2022: Added code for ctsem models - if(control$sem.prog == 'ctsem'){ + if (control$sem.prog == "ctsem") { full.model <- suppressMessages(try( - ctsemOMX::ctFit(dat = mydata[, -meta$covariate.ids], - ctmodelobj = model$ctmodelobj, - dataform = "wide", - objective = model$ctfitargs$objective, - stationary = model$ctfitargs$stationary, - optimizer = model$ctfitargs$optimizer, - retryattempts = ifelse(model$ctfitargs$retryattempts >= 20, - yes = model$ctfitargs$retryattempts, - no = 20), - carefulFit = model$ctfitargs$carefulFit, - showInits = model$ctfitargs$showInits, - asymptotes = model$ctfitargs$asymptotes, - meanIntervals = model$ctfitargs$meanIntervals, - discreteTime = model$ctfitargs$discreteTime, - verbose = model$ctfitargs$verbose, - transformedParams = model$ctfitargs$transformedParams) + ctsemOMX::ctFit( + dat = mydata[, -meta$covariate.ids], + ctmodelobj = model$ctmodelobj, + dataform = "wide", + objective = model$ctfitargs$objective, + stationary = model$ctfitargs$stationary, + optimizer = model$ctfitargs$optimizer, + retryattempts = ifelse(model$ctfitargs$retryattempts >= 20, + yes = model$ctfitargs$retryattempts, + no = 20 + ), + carefulFit = model$ctfitargs$carefulFit, + showInits = model$ctfitargs$showInits, + asymptotes = model$ctfitargs$asymptotes, + meanIntervals = model$ctfitargs$meanIntervals, + discreteTime = model$ctfitargs$discreteTime, + verbose = model$ctfitargs$verbose, + transformedParams = model$ctfitargs$transformedParams + ) )) full.model$mxobj@name <- "BASE MODEL" node$model <- full.model } } - - - if (is(node$model,"try-error")) - { + + + if (is(node$model, "try-error")) { ui_fail("Model had a run error.") - node$term.reason <- node$model[[1]] - node$model <- NULL; - return(node); + node$term.reason <- node$model[[1]] + node$model <- NULL + return(node) } - + if (is.null(node$model)) { - node$term.reason <- "Model was NULL! Model could not be estimated."; - return(node); + node$term.reason <- "Model was NULL! Model could not be estimated." + return(node) } - - + + ########################################################### ### OPENMX USED HERE ### ########################################################### - if(control$sem.prog == 'OpenMx'){ - + if (control$sem.prog == "OpenMx") { # some export/namespace problem here with the generic # getS3method("summary","MxModel") gets me the right fun - S3summary <- getS3method("summary","MxModel") - + S3summary <- getS3method("summary", "MxModel") + # list of point estimates, std.dev, and names of all freely estimated parameters - node$params <- S3summary(node$model)$parameters[,5]; - names(node$params) <- S3summary(node$model)$parameters[,1]; - node$params_sd <- S3summary(node$model)$parameters[,6]; - node$param_names <- S3summary(node$model)$parameters[,1]; + node$params <- S3summary(node$model)$parameters[, 5] + names(node$params) <- S3summary(node$model)$parameters[, 1] + node$params_sd <- S3summary(node$model)$parameters[, 6] + node$param_names <- S3summary(node$model)$parameters[, 1] } ########################################################### ### lavaan USED HERE ### ########################################################### - if(control$sem.prog == 'lavaan'){ - - node$params <- lavaan::coef(node$model) # put parameters into node + if (control$sem.prog == "lavaan") { + node$params <- lavaan::coef(node$model) # put parameters into node names(node$params) <- names(lavaan::coef(node$model)) # parameter names are stored as well - - #read in estimated parameters (take only those that have non-NA z values) - #parameters <- data.frame( + + # read in estimated parameters (take only those that have non-NA z values) + # parameters <- data.frame( # lavaan::parameterEstimates(node$model))[!is.na( # data.frame(lavaan::parameterEstimates(node$model))[,"z"]),] - + parameters <- lavaan::parameterEstimates(node$model) - + # if any labels are missing (some labels provided), then put default labels in the label col. - for(i in 1:nrow(parameters)){ - if(!is.null(parameters$label)){ - if(parameters$label[i]==""){parameters$label[i]<-paste(parameters$lhs[i],parameters$op[i],parameters$rhs[i],sep="")} + for (i in 1:nrow(parameters)) { + if (!is.null(parameters$label)) { + if (parameters$label[i] == "") { + parameters$label[i] <- paste(parameters$lhs[i], parameters$op[i], parameters$rhs[i], sep = "") + } } - } + } # if all labels are missing make a label column - if(is.null(parameters$label)){ - label <- paste(parameters$lhs,parameters$op,parameters$rhs,sep="") - parameters<- cbind(parameters,label) - } - + if (is.null(parameters$label)) { + label <- paste(parameters$lhs, parameters$op, parameters$rhs, sep = "") + parameters <- cbind(parameters, label) + } + # store the SE of the estimates - se <- rep(NA,length(unique(parameters$se))) - for(i in 1:length(unique(parameters$label))){ - for(j in 1:nrow(parameters)){ - if(unique(parameters$label)[i]==parameters$label[j]){se[i]<-parameters$se[j]} + se <- rep(NA, length(unique(parameters$se))) + for (i in 1:length(unique(parameters$label))) { + for (j in 1:nrow(parameters)) { + if (unique(parameters$label)[i] == parameters$label[j]) { + se[i] <- parameters$se[j] + } } } - + # list of point estimates, std.dev, and names of all freely estimated parameters node$params_sd <- se node$param_names <- names(lavaan::coef(node$model)) @@ -190,8 +236,7 @@ growTree <- function(model=NULL, mydata=NULL, ########################################################### ### ctsemOMX USED HERE ### ########################################################### - if(control$sem.prog == 'ctsem'){ - + if (control$sem.prog == "ctsem") { # list of point estimates, std.dev, and names of all freely estimated parameters if (control$ctsem_sd) { # slower ctsem_summary <- summary(node$model) # this is very slow @@ -206,9 +251,9 @@ growTree <- function(model=NULL, mydata=NULL, node$param_names <- names(ctsem_coef) } } - + # df - + if (!is.null(constraints$focus.parameters)) { # df's are equal to number of focus parameters node$df <- length(constraints$focus.parameters) @@ -217,160 +262,177 @@ growTree <- function(model=NULL, mydata=NULL, # df == num. parameters node$df <- length(node$param_names) } - - - + + + # add unique node id via global variable node$node_id <- getGlobal("global.node.id") - setGlobal("global.node.id", node$node_id+1) - + setGlobal("global.node.id", node$node_id + 1) + # determine whether we should skip splitting # 1. minimum cases in node if (!is.na(control$min.N)) { if (node$N <= control$min.N) { - if(control$verbose){ + if (control$verbose) { ui_message("Minimum user defined N for leaf node.") } - node$term.reason <- "Minimum number of cases in leaf node" - return(node); + node$term.reason <- "Minimum number of cases in leaf node" + return(node) } } # 2. maximum depth for tree reached - if (!is.na(control$max.depth)){ + if (!is.na(control$max.depth)) { if (depth >= control$max.depth) { - if(control$verbose){ + if (control$verbose) { ui_message("Max user defined tree depth reached.") } - node$term.reason <- "Maximum depth reached in leaf node" - return(node); + node$term.reason <- "Maximum depth reached in leaf node" + return(node) } } - + # determine best split based in chosen method (ml or score) and (naive, cv, fair) result <- NULL # 1. unbalanced selection method if (control$method == "naive") { - # if (control$test.type=="ml") { - + result <- tryCatch( ################################################ - naiveSplit(node$model, mydata, control, invariance, meta, constraints=constraints, ...) + naiveSplit(node$model, mydata, control, invariance, meta, constraints = constraints, ...) ################################################ , - error = function(e) { cat(paste("Error occured!",e,sep="\n")); traceback(); return(NULL); } - ); - - } else if (control$method=="score") { - + error = function(e) { + cat(paste("Error occured!", e, sep = "\n")) + traceback() + return(NULL) + } + ) + } else if (control$method == "score") { result <- tryCatch( ################################################ - result <- ScoreSplit(node$model, mydata, control, invariance, meta, constraints=constraints, ...) + result <- ScoreSplit(node$model, mydata, control, invariance, meta, constraints = constraints, ...) ################################################ , - error = function(e) { cat(paste("Error occured!",e,sep="\n")); traceback(); return(NULL); } - ); - - } + error = function(e) { + cat(paste("Error occured!", e, sep = "\n")) + traceback() + return(NULL) + } + ) + } # 2a. split half data to determine best split then use hold out set to compare one split per covariate else if (control$method == "fair") { control$fair3Step <- FALSE result <- tryCatch( ################################################ - fairSplit(node$model, mydata, control, invariance, meta, constraints=constraints, ...) + fairSplit(node$model, mydata, control, invariance, meta, constraints = constraints, ...) ################################################ , - error = function(e) { cat(paste("Error occured!",e,sep="\n")); return(NULL); } - ); + error = function(e) { + cat(paste("Error occured!", e, sep = "\n")) + return(NULL) + } + ) } # 2b. split half data to determine best split then use hold out set to compare one split per covariate, with step 3 all splits retest else if (control$method == "fair3") { control$fair3Step <- TRUE result <- tryCatch( - ################################################ - fairSplit(node$model, mydata, control, invariance, meta, constraints=constraints, ...) + ################################################ + fairSplit(node$model, mydata, control, invariance, meta, constraints = constraints, ...) ################################################ , - error = function(e) { cat(paste("Error occured!",e,sep="\n")); return(NULL); } - ); - } - # 3. Traditional cross validation for covariate split selection - else if (control$method == "cv") { - stop("This split selection procedure is not supported anymore. Please see the new score-based tests for split selection.") + error = function(e) { + cat(paste("Error occured!", e, sep = "\n")) + return(NULL) + } + ) } else { ui_fail("Error. Unknown split method selected") stop() } - + # return values in result are: # LL.max : numeric, log likelihood ratio of best split - # split.max : numeric, value to split best column on + # split.max : numeric, split point; value to split best column on + # (for metric variables) # col.max : index of best column - # cov.name : name of best candidate - + # name.max : name of best candidate + # type.max : + # btn.matrix : a matrix, which contains test statistics and + # more information for + # the various split points evaluated + # n.comp : the (implicit) number of multiple tests evaluated for + # determining the best split + # store the value of the selected test statistic node$lr <- NA if (!is.null(result)) { node$lr <- result$LL.max node$result <- result } - + # if no split found, exit node without continuing growth if (is.null(result) || is.null(result$LL.max)) { if (control$verbose) { ui_message("Best Likelihood Ratio was NULL. Stop splitting") } - return(node); + return(node) } - + # provide verbose output to the user about best split if (control$verbose) { - ui_ok("Best split is ",result$name.max," with statistic = ",round(node$lr,2)); + ui_ok("Best split is ", result$name.max, " with statistic = ", round(node$lr, 2)) } - + # compute p value from chi2 if (!is.null(result$p.max)) { node$p <- result$p.max } else { - node$p <- pchisq(node$lr,df=node$df, lower.tail=F) - + node$p <- pchisq(node$lr, df = node$df, lower.tail = F) + if (control$use.maxlm) { - # Borders for continuous covariates if (!is.factor(mydata[, result$name.max])) { - props <- cumsum(table(mydata[, result$name.max])) / node$N split_val_lhs <- as.numeric(names(which(props >= control$strucchange.from)[1])) split_val_rhs <- as.numeric(names(which(props >= control$strucchange.to)[1])) - + btn_matrix_max <- result$btn.matrix[, result$btn.matrix["variable", ] == - result$name.max, drop = FALSE] - + result$name.max, drop = FALSE] + num_split_val <- as.numeric(btn_matrix_max["split val", ]) - + n1 <- which(num_split_val <= split_val_lhs) n1 <- n1[length(n1)] n2 <- which(num_split_val >= split_val_rhs)[1] - - if (length(n1) == 0) {n1 <- 1} - if (is.na(n2)) {n2 <- length(num_split_val)} - + + if (length(n1) == 0) { + n1 <- 1 + } + if (is.na(n2)) { + n2 <- length(num_split_val) + } + LR <- as.numeric(btn_matrix_max["LR", n1:n2]) - + max_pos <- which.max(LR) + n1 - 1 node$result$LL.max <- node$lr <- as.numeric(btn_matrix_max["LR", max_pos]) node$result$split.max <- as.numeric(btn_matrix_max["split val", max_pos]) } - - node$p <- computePval_maxLR(maxLR = node$lr, q = node$df, - covariate = mydata[,result$col.max], from = control$strucchange.from, - to = control$strucchange.to, nrep = control$strucchange.nrep) + + node$p <- computePval_maxLR( + maxLR = node$lr, q = node$df, + covariate = mydata[, result$col.max], from = control$strucchange.from, + to = control$strucchange.to, nrep = control$strucchange.nrep + ) } } - - + + # --------- determine whether to continue splitting -------------- - if (is(control$custom.stopping.rule,"function")) { + if (is(control$custom.stopping.rule, "function")) { stopping.rule <- control$custom.stopping.rule } else { stopping.rule <- stoppingRuleDefault @@ -378,169 +440,197 @@ growTree <- function(model=NULL, mydata=NULL, # stoppingRuleDefault() is a function that gets inputs node, result, control # this function can be replaced by a user-specified function srule <- stopping.rule(node, result, control) - + # determine whether splitting should be continued depending on return state # of the function - if (is(srule,"list")) { + if (is(srule, "list")) { node <- srule$node cont.split <- !(srule$stop.rule) } else { - cont.split <- !srule + cont.split <- !srule node$p.values.valid <- FALSE } - + # restore mydata here if (mtry was > 0) -- for semforests if (control$mtry > 0) { - # also need to remap col.max to original data! if (!is.null(result$col.max) && !is.na(result$col.max)) { col.max.name <- names(mydata)[result$col.max] - result$col.max <- which(names(fulldata)==col.max.name) + result$col.max <- which(names(fulldata) == col.max.name) } else { col.max.name <- NULL } - + # restore data mydata <- fulldata meta <- fullmeta } - if ((!is.null(cont.split)) && (!is.na(cont.split)) && (cont.split)) { - if (control$report.level > 10) { - report("Stop splitting based on stopping rule.", 1) + # restore mydata if forced split was true + # and (potentially) force continuation of splitting + if (!is.null(forced_splits)) { + + + # also need to remap col.max to original data! + if (!is.null(result$col.max) && !is.na(result$col.max)) { + col.max.name <- names(mydata)[result$col.max] + result$col.max <- which(names(fulldata) == col.max.name) + } else { + col.max.name <- NULL } + mydata <- fulldata + meta <- fullmeta + # pop first element + forced_splits <- forced_splits[-1] + # set to NULL if no splits left + if (length(forced_splits)==0) forced_splits <- NULL + # force continuation of splitting ? + cont.split <- TRUE + } + + if ((!is.null(cont.split)) && (!is.na(cont.split)) && (cont.split)) { + if (control$report.level > 10) { + report("Stop splitting based on stopping rule.", 1) + } + + + # store the split name (covariate name and split value) RHS is yes branch - if(result$type.max==.SCALE_CATEGORICAL) { + if (result$type.max == .SCALE_CATEGORICAL) { # unordered factor collating and splitting lvl <- (control$method == "fair") - result1 <- recodeAllSubsets(mydata[,result$col.max],colnames(mydata)[result$col.max], - growbool=T, use.levels=lvl) - - + result1 <- recodeAllSubsets(mydata[, result$col.max], colnames(mydata)[result$col.max], + growbool = T, use.levels = lvl + ) + + test2 <- rep(NA, nrow(mydata)) - if(!is.na(result1$num_sets) & !is.null(result1$num_sets)){ - for(j in 1:result1$num_sets) { + if (!is.na(result1$num_sets) & !is.null(result1$num_sets)) { + for (j in 1:result1$num_sets) { test1 <- rep(NA, nrow(mydata)) - for(i in 1:nrow(mydata)) { - if(isTRUE(result1$columns[i,j])) {test1[i] <- 1} - else if(!is.na(result1$columns[i,j])){test1[i] <- 0} - else{test1[i]<-NA} + for (i in 1:nrow(mydata)) { + if (isTRUE(result1$columns[i, j])) { + test1[i] <- 1 + } else if (!is.na(result1$columns[i, j])) { + test1[i] <- 0 + } else { + test1[i] <- NA + } } test1 <- as.factor(test1) test2 <- data.frame(test2, test1) } } - test2 <- test2[,-1] - - # if var.type==1, then split.max corresponds to the index of + test2 <- test2[, -1] + + # if level is categorical, then split.max corresponds to the index of # the best column in the matrix that represents all subsets # make sure that this is not casted to a string if there # are predictors of other types (esp., factors) # browser() - result$split.max <- as.integer(result$split.max) - - #named <- colnames(result1$columns)[result$split.max] - # node$caption <- paste(colnames(result1$columns)[result$split.max]) - best_subset_col_id = result$split.max - best_values = result1$expressions[ (best_subset_col_id-1)*3 +1]$value - - node$rule = list(variable=result$col.max, relation="%in%", - value=best_values, - name = result$name.max) - node$caption <- paste(result$name.max, " in [", paste0(best_values, - collapse=" ")," ]") - - if(result1$num_sets==1) { - sub1 <- subset (mydata, as.numeric(test2) == 2) - sub2 <- subset (mydata, as.numeric(test2) == 1) + if (!all(is.na(result$btn.matrix))) { + result$split.max <- as.integer(result$split.max) + + # named <- colnames(result1$columns)[result$split.max] + # node$caption <- paste(colnames(result1$columns)[result$split.max]) + best_subset_col_id <- result$split.max + best_values <- result1$expressions[(best_subset_col_id - 1) * 3 + 1]$value + } else { + best_values <- result$split.max } - else { - sub1 <- subset (mydata, as.numeric(test2[[result$split.max]]) == 2) - sub2 <- subset (mydata, as.numeric(test2[[result$split.max]]) == 1) + node$rule <- list( + variable = result$col.max, relation = "%in%", + value = best_values, + name = result$name.max + ) + node$caption <- paste(result$name.max, " in [", paste0(best_values, + collapse = " " + ), " ]") + + if (result1$num_sets == 1) { + sub1 <- subset(mydata, as.numeric(test2) == 2) + sub2 <- subset(mydata, as.numeric(test2) == 1) + } else { + sub1 <- subset(mydata, as.numeric(test2[[result$split.max]]) == 2) + sub2 <- subset(mydata, as.numeric(test2[[result$split.max]]) == 1) } - - } - else if (result$type.max==.SCALE_METRIC){ - + } else if (result$type.max == .SCALE_METRIC) { # if var.type==2, then split.max corresponds to the split point value # make sure that this is not casted to a string if there # are predictors of other types (esp., factors) result$split.max <- as.numeric(result$split.max) - + # ordered factor splitting of data - node$caption <- paste(result$name.max,">=", signif(result$split.max,6),sep=" ") - node$rule = list(variable=result$col.max, relation=">=", value=c(result$split.max), name = result$name.max) - sub1 <- subset( mydata, as.numeric(as.character(mydata[, (result$col.max)])) >result$split.max) - sub2 <- subset( mydata, as.numeric(as.character(mydata[, (result$col.max)]))<=result$split.max) - } - else if (result$type.max==.SCALE_ORDINAL) { - - node$caption <- paste(result$name.max,">", result$split.max,sep=" ") - node$rule = list(variable=result$col.max, relation=">", value=c(result$split.max), name = result$name.max) - sub1 <- subset( mydata, mydata[, (result$col.max)] >result$split.max) - sub2 <- subset( mydata, mydata[, (result$col.max)]<=result$split.max) - - } - else if (result$type.max == 99) { + node$caption <- paste(result$name.max, ">=", signif(result$split.max, 6), sep = " ") + node$rule <- list(variable = result$col.max, relation = ">=", value = c(result$split.max), name = result$name.max) + sub1 <- subset(mydata, as.numeric(as.character(mydata[, (result$col.max)])) > result$split.max) + sub2 <- subset(mydata, as.numeric(as.character(mydata[, (result$col.max)])) <= result$split.max) + } else if (result$type.max == .SCALE_ORDINAL) { + node$caption <- paste(result$name.max, ">", result$split.max, sep = " ") + node$rule <- list(variable = result$col.max, relation = ">", value = c(result$split.max), name = result$name.max) + sub1 <- subset(mydata, mydata[, (result$col.max)] > result$split.max) + sub2 <- subset(mydata, mydata[, (result$col.max)] <= result$split.max) + } else if (result$type.max == 99) { # this is an error code by score test implementation # return node and stop splitting # TODO (MA): Do we need to issue a warning? return(node) - } - else { + } else { # TODO: remove this bc this condition should be captured earlier in any case # continuous variables splitting - #node$caption <- paste(result$name.max,">=", signif(result$split.max,3),sep=" ") - #node$rule = list(variable=result$col.max, relation=">=", value=c(result$split.max), name = result$name.max) - #sub1 <- subset( mydata, as.numeric(mydata[, (result$col.max)]) >result$split.max) - #sub2 <- subset( mydata, as.numeric(mydata[, (result$col.max)])<=result$split.max) + # node$caption <- paste(result$name.max,">=", signif(result$split.max,3),sep=" ") + # node$rule = list(variable=result$col.max, relation=">=", value=c(result$split.max), name = result$name.max) + # sub1 <- subset( mydata, as.numeric(mydata[, (result$col.max)]) >result$split.max) + # sub2 <- subset( mydata, as.numeric(mydata[, (result$col.max)])<=result$split.max) stop("An error occured!") } - + flush.console() - + ########################################################## ## NEW CODE TO INCLUDE CASES MISSING ON SPLITTING VARIABLE class(node) <- "semtree" - if(control$use.all& (nrow(mydata)>(nrow(sub1)+nrow(sub2)))){ - if(control$verbose){message("Missing on splitting variable: ",result$name.max)} + if (control$use.all & (nrow(mydata) > (nrow(sub1) + nrow(sub2)))) { + if (control$verbose) { + message("Missing on splitting variable: ", result$name.max) + } completeSplits <- calculateBestSubsets(model, mydata, sub1, sub2, result) sub1 <- completeSplits$sub1 sub2 <- completeSplits$sub2 } ########################################################## - + # build a model for missing data if (control$missing == "ctree") { ui_warn("Missing data treatment with ctree is not yet implemented.") - #temp = mydata[!is.na(mydata[,result$name.max]),] - #node$missing.model = party::ctree( + # temp = mydata[!is.na(mydata[,result$name.max]),] + # node$missing.model = party::ctree( # data = temp, # formula = as.formula(paste0(result$name.max,"~."))) } else if (control$missing == "rpart") { - temp = mydata[!is.na(mydata[,result$name.max]),] - node$missing.model = rpart::rpart( + temp <- mydata[!is.na(mydata[, result$name.max]), ] + node$missing.model <- rpart::rpart( data = temp, - formula = as.formula(paste0(result$name.max,"~."))) + formula = as.formula(paste0(result$name.max, "~.")) + ) } - - + + # recursively continue splitting # result1 - RHS; result2 - LHS - result2 <- growTree( node$model, sub2, control, invariance, meta, edgelabel=0, depth=depth+1, constraints) - result1 <- growTree( node$model, sub1, control, invariance, meta, edgelabel=1, depth=depth+1, constraints) - + result2 <- growTree(node$model, sub2, control, invariance, meta, edgelabel = 0, depth = depth + 1, constraints, forced_splits = forced_splits) + result1 <- growTree(node$model, sub1, control, invariance, meta, edgelabel = 1, depth = depth + 1, constraints, forced_splits = forced_splits) + # store results in recursive list structure node$left_child <- result2 node$right_child <- result1 - - return(node); - + + return(node) } else { # if cont.split is F or NA or NULL then return node without further splitting - return(node); - } -} + return(node) + } +} diff --git a/R/mergeForests.R b/R/mergeForests.R index 68edb61..223f0ce 100644 --- a/R/mergeForests.R +++ b/R/mergeForests.R @@ -4,6 +4,7 @@ #' #' #' @aliases merge.semforest +#' #' @param x A SEM Forest #' @param y A second SEM Forest #' @param \dots Extra arguments. Currently unused. @@ -12,14 +13,15 @@ #' @references Brandmaier, A.M., Oertzen, T. v., McArdle, J.J., & Lindenberger, #' U. (2013). Structural equation model trees. \emph{Psychological Methods}, #' 18(1), 71-86. +#' #' @exportS3Method merge semforest merge.semforest <- function(x, y, ...) { - return(merge.internal(list(x,y))) + return(merge_internal(list(x,y))) } -merge.internal <- function(forest.list){ +merge_internal <- function(forest.list){ # determine number of forests to merge num.forests <- length(forest.list) @@ -33,7 +35,7 @@ merge.internal <- function(forest.list){ if (getModelType(m1) != getModelType(m2)) stop("Incompatible models") if (getModelType(m1)=="OpenMx") { # for OpenMx models, we compare whether a selected set of - # attributes instead of the entire object because eg. + # attributes are identical instead of the entire object because eg. # the output-attribute may differ on time stamps or # the compute-attribute may differ for the optimizer used or # the number of iterations @@ -41,14 +43,14 @@ merge.internal <- function(forest.list){ for (at in list("matrices","algebras","constraints","latentVars","manifestVars", "data","data means","data type","submodels","expectation","fitfunction", "independent")) { - c1_temp <- digest::digest(attr(m1,at))==digest::digest(attr(m2,at)) + c1_temp <- identical(attr(m1,at), attr(m2,at)) if (!c1_temp) { ui_warn("Models differ on attribute '",at,"'.") } c1 <- c1 & c1_temp } } else if (getModelType(m1)=="lavaan") { - c1 <- digest::digest(m1)==digest::digest(m2) + c1 <- identical(m1, m2) } else { - c1 <- digest::digest(m1)==digest::digest(m2) + c1 <- identical(m1, m2) } # some checks @@ -56,7 +58,7 @@ merge.internal <- function(forest.list){ tmp1$num.trees <- NA tmp2 <- forest.list[[i]]$control tmp2$num.trees <- NA - c2 <- digest::digest(tmp1)==digest::digest(tmp2) + c2 <- identical(tmp1, tmp2) if (!c1) { stop("Cannot merge forests! Models differ."); } diff --git a/R/naiveSplit.R b/R/naiveSplit.R index 3ae75f3..4046d76 100644 --- a/R/naiveSplit.R +++ b/R/naiveSplit.R @@ -15,44 +15,45 @@ naiveSplit <- cov.type <- c() cov.col <- c() cov.name <- c() - + LL.within <- c() within.split <- c() - #Baseline model to compare subgroup fits to + # Baseline model to compare subgroup fits to ########################################################### ### OPENMX USED HERE ### ########################################################### - if (control$sem.prog == 'OpenMx') { + if (control$sem.prog == "OpenMx") { modelnew <- mxAddNewModelData(model, mydata, name = "BASE MODEL") LL.overall <- safeRunAndEvaluate(modelnew) - suppressWarnings(if (is.na(LL.overall)) - return(NULL)) + suppressWarnings(if (is.na(LL.overall)) { + return(NULL) + }) } ########################################################### ### lavaan USED HERE ### ########################################################### - if (control$sem.prog == 'lavaan') { - #if (control$verbose) {message("Assessing overall model")} + if (control$sem.prog == "lavaan") { + # if (control$verbose) {message("Assessing overall model")} modelnew <- eval(parse( text = paste( - "lavaan::",model@Options$model.type, - '(lavaan::parTable(model),data=mydata,missing=\'', + "lavaan::", model@Options$model.type, + "(lavaan::parTable(model),data=mydata,missing='", model@Options$missing, - '\',do.fit=F)', + "',do.fit=F)", sep = "" ) )) - #modelnew <- lavaan(parTable(model),data=mydata,model.type=model@Options$model.type,do.fit=FALSE) + # modelnew <- lavaan(parTable(model),data=mydata,model.type=model@Options$model.type,do.fit=FALSE) LL.overall <- safeRunAndEvaluate(modelnew) - suppressWarnings(if (is.na(LL.overall)) - return(NULL)) + suppressWarnings(if (is.na(LL.overall)) { + return(NULL) + }) } if (pp) { comparedData <- max(meta$model.ids + 1) - } - else { + } else { comparedData <- meta$covariate.ids } for (cur_col in comparedData) { @@ -66,37 +67,37 @@ naiveSplit <- if (control$report.level > 10) { report(paste("Estimating baseline likelihood: ", LL.baseline), 1) } - - + + # tell the user a little bit about where we are right now if (control$verbose) { - ui_message("Testing Predictor: ", - colnames(mydata)[cur_col]) + ui_message( + "Testing Predictor: ", + colnames(mydata)[cur_col] + ) } ############################################################ - #case for factored covariates############################## + # case for factored covariates############################## if (is.factor(mydata[, cur_col])) { - #unordered factors##################################### + # unordered factors##################################### if (!is.ordered(mydata[, cur_col])) { - var.type = .SCALE_CATEGORICAL - + var.type <- .SCALE_CATEGORICAL + val.sets <- unique(mydata[, cur_col]) if (length(val.sets) > 1) { - #create binaries for comparison of all combinations + # create binaries for comparison of all combinations result <- recodeAllSubsets(mydata[, cur_col], colnames(mydata)[cur_col]) test1 <- c() test2 <- rep(NA, length(mydata[, cur_col])) - + for (j in 1:ncol(result$columns)) { for (i in 1:length(mydata[, cur_col])) { if (isTRUE(result$columns[i, j])) { test1[i] <- 1 - } - else if (!is.na(result$columns[i, j])) { + } else if (!is.na(result$columns[i, j])) { test1[i] <- 0 - } - else{ + } else { test1[i] <- NA } } @@ -104,27 +105,27 @@ naiveSplit <- test2 <- data.frame(test2, test1) } test2 <- test2[, -1] - + for (i in 1:(result$num_sets)) { LL.temp <- c() - #subset data for chosen value and store LL + # subset data for chosen value and store LL if (result$num_sets == 1) { - subset1 <- subset (mydata, as.numeric(test2) == 2) - subset2 <- subset (mydata, as.numeric(test2) == 1) - } - else { - subset1 <- subset (mydata, as.numeric(test2[[i]]) == 2) - subset2 <- subset (mydata, as.numeric(test2[[i]]) == 1) + subset1 <- subset(mydata, as.numeric(test2) == 2) + subset2 <- subset(mydata, as.numeric(test2) == 1) + } else { + subset1 <- subset(mydata, as.numeric(test2[[i]]) == 2) + subset2 <- subset(mydata, as.numeric(test2[[i]]) == 1) } - + # refit baseline model with focus parameters @TAGX if (!is.null(constraints) & - (!is.null(constraints$focus.parameters))) { + (!is.null(constraints$focus.parameters))) { LL.baseline <- fitSubmodels(model, - subset1, - subset2, - control, - invariance = constraints$focus.parameters) + subset1, + subset2, + control, + invariance = constraints$focus.parameters + ) if (control$report.level > 10) { report( paste( @@ -135,7 +136,7 @@ naiveSplit <- ) } } - #browser() + # browser() LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) if (!is.na(LL.return)) { @@ -149,32 +150,33 @@ naiveSplit <- } } } - #ordered factors######################################### + # ordered factors######################################### if (is.ordered(mydata[, cur_col])) { - var.type = .SCALE_ORDINAL - + var.type <- .SCALE_ORDINAL + val.sets <- sort(unique(mydata[, cur_col])) - + if (length(val.sets) > 1) { for (i in 2:(length(val.sets))) { LL.temp <- c() - #subset data for chosen value and store LL - #cond1 <- as.numeric(as.character(mydata[,cur_col])) > (val.sets[i]+val.sets[(i-1)])/2 - #cond2 <- as.numeric(as.character(mydata[,cur_col])) < (val.sets[i]+val.sets[(i-1)])/2 - + # subset data for chosen value and store LL + # cond1 <- as.numeric(as.character(mydata[,cur_col])) > (val.sets[i]+val.sets[(i-1)])/2 + # cond2 <- as.numeric(as.character(mydata[,cur_col])) < (val.sets[i]+val.sets[(i-1)])/2 + cond1 <- mydata[, cur_col] > val.sets[i - 1] cond2 <- mydata[, cur_col] <= val.sets[i - 1] - subset1 <- subset (mydata, cond1) - subset2 <- subset (mydata, cond2) - + subset1 <- subset(mydata, cond1) + subset2 <- subset(mydata, cond2) + # refit baseline model with focus parameters @TAGX if (!is.null(constraints) & - (!is.null(constraints$focus.parameters))) { + (!is.null(constraints$focus.parameters))) { LL.baseline <- fitSubmodels(model, - subset1, - subset2, - control, - invariance = constraints$focus.parameters) + subset1, + subset2, + control, + invariance = constraints$focus.parameters + ) if (control$report.level > 10) { report( paste( @@ -185,12 +187,12 @@ naiveSplit <- ) } } - + LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) if (!is.na(LL.return)) { LL.within <- cbind(LL.within, (LL.baseline - LL.return)) - #within.split <- cbind(within.split, (val.sets[i]+val.sets[(i-1)])/2) + # within.split <- cbind(within.split, (val.sets[i]+val.sets[(i-1)])/2) within.split <- cbind(within.split, as.character(val.sets[i - 1])) cov.col <- cbind(cov.col, cur_col) @@ -203,33 +205,34 @@ naiveSplit <- } } - #numeric (continuous) covariates################################ + # numeric (continuous) covariates################################ if (is.numeric(mydata[, cur_col])) { - var.type = .SCALE_METRIC + var.type <- .SCALE_METRIC v <- as.numeric(mydata[, cur_col]) val.sets <- sort(unique(v)) - + if (length(val.sets) > 1) { for (i in 2:(length(val.sets))) { LL.temp <- c() - #subset data for chosen value and store LL + # subset data for chosen value and store LL cond1 <- as.numeric(mydata[, cur_col]) > (val.sets[i] + val.sets[(i - 1)]) / 2 cond2 <- as.numeric(mydata[, cur_col]) < (val.sets[i] + val.sets[(i - 1)]) / 2 - subset1 <- subset (mydata, cond1) - subset2 <- subset (mydata, cond2) - - #catch LLR for each comparison - + subset1 <- subset(mydata, cond1) + subset2 <- subset(mydata, cond2) + + # catch LLR for each comparison + # refit baseline model with focus parameters @TAGX if (!is.null(constraints) & - (!is.null(constraints$focus.parameters))) { + (!is.null(constraints$focus.parameters))) { LL.baseline <- fitSubmodels(model, - subset1, - subset2, - control, - invariance = constraints$focus.parameters) + subset1, + subset2, + control, + invariance = constraints$focus.parameters + ) if (control$report.level > 10) { report( paste( @@ -240,7 +243,7 @@ naiveSplit <- ) } } - + LL.return <- fitSubmodels(model, subset1, subset2, control, invariance = NULL) # cat("LLreturn:",LL.return," and value split at:",(val.sets[i]+val.sets[(i-1)])/2,"\n") @@ -253,23 +256,21 @@ naiveSplit <- cov.type <- cbind(cov.type, var.type) n.comp <- n.comp + 1 } - } } - } } - - + + if (is.null(LL.within)) { return(NULL) } - + btn.matrix <- rbind(LL.within, cov.name, cov.col, within.split) colnames(btn.matrix) <- c(paste("var", seq(1, ncol(btn.matrix)), sep = "")) rownames(btn.matrix) <- c("LR", "variable", "column", "split val") - + filter <- c() if (!is.null(invariance)) { if (control$verbose) { @@ -278,9 +279,9 @@ naiveSplit <- filter <- invarianceFilter(model, mydata, btn.matrix, LL.baseline, invariance, control) } - + # find best - + LL.max <- NA split.max <- NA name.max <- NA @@ -295,8 +296,7 @@ naiveSplit <- name.max <- cov.name[cur_col] col.max <- cov.col[cur_col] type.max <- cov.type[cur_col] - } - else if (LL.within[cur_col] > LL.max) { + } else if (LL.within[cur_col] > LL.max) { LL.max <- LL.within[cur_col] split.max <- within.split[cur_col] name.max <- cov.name[cur_col] @@ -304,8 +304,7 @@ naiveSplit <- type.max <- cov.type[cur_col] } } - } - else { + } else { if (!is.na(LL.within[cur_col])) { if (is.na(LL.max)) { LL.max <- LL.within[cur_col] @@ -313,8 +312,7 @@ naiveSplit <- name.max <- cov.name[cur_col] col.max <- cov.col[cur_col] type.max <- cov.type[cur_col] - } - else if (LL.within[cur_col] > LL.max) { + } else if (LL.within[cur_col] > LL.max) { LL.max <- LL.within[cur_col] split.max <- within.split[cur_col] name.max <- cov.name[cur_col] @@ -324,26 +322,24 @@ naiveSplit <- } } } - - if (control$verbose & control$report.level==99) { - - cat("LL.within:",paste0(LL.within,collapse=","),"\n") - cat("LL.max: ", paste0(LL.max,collapse=","),"\n") - cat("within.split: ",paste0(within.split,collapse=","),"\n" ) - cat("split max",split.max,"\n") + + if (control$verbose & control$report.level == 99) { + cat("LL.within:", paste0(LL.within, collapse = ","), "\n") + cat("LL.max: ", paste0(LL.max, collapse = ","), "\n") + cat("within.split: ", paste0(within.split, collapse = ","), "\n") + cat("split max", split.max, "\n") } - + # alternative way of counting the number of comparisons # count the number of variables instead of tests if (control$naive.bonferroni.type == 1) { n.comp <- length(comparedData) } - - + + if (is.na(LL.max)) { return(NULL) - } - else + } else { ( return( list( @@ -358,4 +354,5 @@ naiveSplit <- ) ) ) - } \ No newline at end of file + } + } diff --git a/R/nodeFunSemtree.R b/R/nodeFunSemtree.R index f83794f..2c35753 100644 --- a/R/nodeFunSemtree.R +++ b/R/nodeFunSemtree.R @@ -1,7 +1,6 @@ - -nodeFunSemtree<-function(x, labs, digits, varlen) -{ - paste(ifelse(x$frame$var=="",x$frame$estimates, x$frame$yval), "\n\n", - paste0("N=", x$frame$n), x$frame$crit) +nodeFunSemtree <- function(x, labs, digits, varlen) { + paste( + ifelse(x$frame$var == "", x$frame$estimates, x$frame$yval), "\n\n", + paste0("N=", x$frame$n), x$frame$crit + ) } - diff --git a/R/npar.R b/R/npar.R new file mode 100644 index 0000000..959ace5 --- /dev/null +++ b/R/npar.R @@ -0,0 +1,12 @@ +npar <- function(x) { + if (is(x, "OpenMx") || is(x, "MxRAMModel")) { + length(OpenMx::omxGetParameters(x)) + } else if (is(x, "ctsemFit")) { + stop("npar() not implemented yet for ctsemFit") + } else if (is(x, "lavaan")) { + pt <- lavaan::parameterTable(x) + sum(pt$free != 0) + } else { + stop("npar() not implemented yet") + } +} diff --git a/R/parameters.R b/R/parameters.R index f4e0cdf..80af166 100644 --- a/R/parameters.R +++ b/R/parameters.R @@ -1,12 +1,12 @@ #' SEMtrees Parameter Estimates Table -#' +#' #' Returns a table of parameters with columns corresponding to freely estimated #' parameters and rows corresponding to nodes in the tree. -#' +#' #' The row names of the resulting data frame correspond to internal node ids #' and the column names correspond to parameters in the SEM. Standard errors of #' the estimates can be obtained from \code{\link{parameters}}. -#' +#' #' @param tree A SEMtree object obtained from \code{\link{semtree}} #' @param leafs.only Default = TRUE. Only the terminal nodes (leafs) are #' printed. If set to FALSE, all node parameters are written to the @@ -20,41 +20,37 @@ #' U. (2013). Structural equation model trees. \emph{Psychological Methods}, #' 18(1), 71-86. #' @export -parameters <- function(tree, leafs.only=TRUE) { - - data <- parameters.rec(tree, leafs.only, 0) - - if (nrow(data) > 1) { - data <- round(data.frame(data[,-1], row.names=data[,1]),digits=3) - names(data) <- tree$param_names; - data <- t(data) - } else { - data <- round(data.frame(data[,-1]),digits=3) - } - - return(data); +parameters <- function(tree, leafs.only = TRUE) { + data <- parameters.rec(tree, leafs.only, 0) + + if (nrow(data) > 1) { + data <- round(data.frame(data[, -1], row.names = data[, 1]), digits = 3) + names(data) <- tree$param_names + data <- t(data) + } else { + data <- round(data.frame(data[, -1]), digits = 3) + } + + return(data) } -parameters.rec <- function(tree, leafs.only=TRUE, level=0) -{ - v <- cbind(tree$node_id, t(tree$params)); - - if (tree$caption == "TERMINAL") - { - return(v); - } - - l <- parameters.rec(tree$left_child, leafs.only, level+1); - r <- parameters.rec(tree$right_child, leafs.only, level+1); - - if(leafs.only){ - data <- rbind(l,r); +parameters.rec <- function(tree, leafs.only = TRUE, level = 0) { + v <- cbind(tree$node_id, t(tree$params)) + + if (tree$caption == "TERMINAL") { + return(v) + } + + l <- parameters.rec(tree$left_child, leafs.only, level + 1) + r <- parameters.rec(tree$right_child, leafs.only, level + 1) + + if (leafs.only) { + data <- rbind(l, r) + } + if (!leafs.only) { + data <- rbind(v, l, r) } - if(!leafs.only){ - data <- rbind(v,l,r); - } - return(data); - + return(data) } diff --git a/R/partialDependence.R b/R/partialDependence.R index b396276..1738f26 100644 --- a/R/partialDependence.R +++ b/R/partialDependence.R @@ -1,8 +1,8 @@ #' Compute partial dependence -#' +#' #' Compute the partial dependence of a predictor, or set of predictors, #' on a model parameter. -#' +#' #' @param x An object for which a method exists #' @param data Optional \code{data.frame} that was used to train the #' model. @@ -17,7 +17,7 @@ #' . Use this argument to provide specific points for which to obtain marginal #' dependence values; for example, the mean and +/- 1SD of \code{reference.var}. #' @param mc Integer. If \code{mc} is not \code{NULL}, the function will sample -#' \code{mc} number of rows from \code{data} with replacement, to estimate +#' \code{mc} number of rows from \code{data} with replacement, to estimate #' marginal dependency using Monte Carlo integration. This is less #' computationally expensive. #' @param FUN Character string with function used to integrate predictions @@ -32,15 +32,17 @@ partialDependence <- function(x, data, reference.var, support = 20, points = NUL #' @method partialDependence semforest #' @importFrom methods hasArg #' @export -partialDependence.semforest <- function(x, data, reference.var, support = 20, points = NULL, mc = NULL, FUN = "median", ...){ +partialDependence.semforest <- function(x, data, reference.var, support = 20, points = NULL, mc = NULL, FUN = "median", ...) { if (!all(reference.var %in% x$covariates)) { - ui_stop("The following predictors are not in the forest: ", - paste0(reference.var[!(reference.var %in% x$covariates)]), - ". Try any of those: ",paste0(x$covariates, collapse=","),".") + ui_stop( + "The following predictors are not in the forest: ", + paste0(reference.var[!(reference.var %in% x$covariates)]), + ". Try any of those: ", paste0(x$covariates, collapse = ","), "." + ) } cl <- match.call() cl[["x"]] <- strip(x) - if(!hasArg(data)) cl[["data"]] <- x$data + if (!hasArg(data)) cl[["data"]] <- x$data cl[[1L]] <- quote(partialDependence) # call partialDependence function on stripped semforest eval.parent(cl) @@ -49,31 +51,30 @@ partialDependence.semforest <- function(x, data, reference.var, support = 20, po #' @method partialDependence semforest_stripped #' @export #' @import data.table -partialDependence.semforest_stripped <- function(x, data, reference.var, support = 20, points = NULL, mc = NULL, FUN = "median", ...){ +partialDependence.semforest_stripped <- function(x, data, reference.var, support = 20, points = NULL, mc = NULL, FUN = "median", ...) { cl <- match.call() - cl <- cl[c(1L, which(names(cl) %in% c("data", "reference.var", "support", "points", "mc") -))] + cl <- cl[c(1L, which(names(cl) %in% c("data", "reference.var", "support", "points", "mc")))] cl[[1L]] <- str2lang("semtree:::partialDependence_data") mp <- eval.parent(cl) preds <- data.table::data.table(predict(x, data = mp, type = "pars")) - mp[,names(mp)[-which(names(mp) %in% c(reference.var, colnames(preds)))]:=NULL] + mp[, names(mp)[-which(names(mp) %in% c(reference.var, colnames(preds)))] := NULL] mp <- cbind(mp, preds) - + # Use median or quantile - pd_samples <- mp[, do.call("c", lapply(.SD, function(thiscol){ + pd_samples <- mp[, do.call("c", lapply(.SD, function(thiscol) { as.list(do.call(FUN, c(list(thiscol), list(...)))) })), by = reference.var, .SDcol = attr(x, "parameters")] - - ret <- list(samples=pd_samples, reference.var = reference.var, support = support, points = points, FUN = FUN, type = "pd") + + ret <- list(samples = pd_samples, reference.var = reference.var, support = support, points = points, FUN = FUN, type = "pd") class(ret) <- c("partialDependence", class(ret)) return(ret) } #' Compute partial dependence for latent growth models -#' +#' #' Compute the partial dependence of a predictor, or set of predictors, #' on the predicted trajectory of a latent growth model. -#' +#' #' @param x An object for which a method exists #' @param data Optional \code{data.frame} that was used to train the #' model. @@ -88,7 +89,7 @@ partialDependence.semforest_stripped <- function(x, data, reference.var, support #' . Use this argument to provide specific points for which to obtain marginal #' dependence values; for example, the mean and +/- 1SD of \code{reference.var}. #' @param mc Integer. If \code{mc} is not \code{NULL}, the function will sample -#' \code{mc} number of rows from \code{data} with replacement, to estimate +#' \code{mc} number of rows from \code{data} with replacement, to estimate #' marginal dependency using Monte Carlo integration. This is less #' computationally expensive. #' @param FUN Character string with function used to integrate predictions @@ -102,37 +103,38 @@ partialDependence.semforest_stripped <- function(x, data, reference.var, support #' @param ... Extra arguments passed to \code{FUN}. #' @author Caspar J. Van Lissa #' @export -partialDependence_growth <- function(x, data, reference.var, support = 20, points = NULL, mc = NULL, FUN = "median", times = NULL, parameters = NULL, ...){ +partialDependence_growth <- function(x, data, reference.var, support = 20, points = NULL, mc = NULL, FUN = "median", times = NULL, parameters = NULL, ...) { cl <- match.call() - cl <- cl[c(1L, which(names(cl) %in% c("data", "reference.var", "support", "points", "mc") -))] + cl <- cl[c(1L, which(names(cl) %in% c("data", "reference.var", "support", "points", "mc")))] cl[[1L]] <- str2lang("semtree:::partialDependence_data") mp <- eval.parent(cl) preds <- predict(x, data = mp, type = "pars", parameters = parameters) preds <- data.table(t(apply(preds, 1, .trajectory, L = times))) - mp[,names(mp)[-which(names(mp) %in% c(reference.var, colnames(preds)))]:=NULL] + mp[, names(mp)[-which(names(mp) %in% c(reference.var, colnames(preds)))] := NULL] mp <- cbind(mp, preds) # Use median or quantile - out <- mp[, do.call("c", lapply(.SD, function(thiscol){ + out <- mp[, do.call("c", lapply(.SD, function(thiscol) { as.list(do.call(FUN, c(list(thiscol), list(...)))) })), by = reference.var] - out <- melt(out, id.vars = reference.var, - measure.vars = names(out)[!names(out) %in% reference.var], - variable.name = "Time") - Time <- NA # TODO this is a hack to fix the CRAN check issue of "Time" + out <- melt(out, + id.vars = reference.var, + measure.vars = names(out)[!names(out) %in% reference.var], + variable.name = "Time" + ) + Time <- NA # TODO this is a hack to fix the CRAN check issue of "Time" # not being defined out[, "Time" := as.integer(as.factor(Time))] - ret <- list(samples=out, reference.var = reference.var, support = support, points = points, FUN = FUN, type = "growth") + ret <- list(samples = out, reference.var = reference.var, support = support, points = points, FUN = FUN, type = "growth") class(ret) <- c("partialDependence", class(ret)) return(ret) } #' Create dataset to compute partial dependence -#' +#' #' Create a dataset with fixed values for \code{reference.var} for all other #' values of \code{data}, or using \code{mc} random samples from \code{data} #' (Monte Carlo integration). -#' +#' #' @param data The \code{data.frame} that was used to train the #' model. #' @param reference.var Character vector, referring to the (independent) @@ -146,15 +148,15 @@ partialDependence_growth <- function(x, data, reference.var, support = 20, point #' . Use this argument to provide specific points for which to obtain marginal #' dependence values; for example, the mean and +/- 1SD of \code{reference.var}. #' @param mc Integer. If \code{mc} is not \code{NULL}, the function will sample -#' \code{mc} number of rows from \code{data} with replacement, to estimate +#' \code{mc} number of rows from \code{data} with replacement, to estimate #' @param keep_id Boolean. Default is false. Should output contain a row id column? #' marginal dependency using Monte Carlo integration. This is less #' computationally expensive. #' @author Caspar J. Van Lissa #' @export -partialDependence_data <- function(data, reference.var, support = 20, +partialDependence_data <- function(data, reference.var, support = 20, points = NULL, mc = NULL, keep_id = FALSE) { - if(is.null(points)){ + if (is.null(points)) { points <- sapply(reference.var, function(x) { seq_unif(data[[x]], length.out = support) }, simplify = FALSE) @@ -163,57 +165,56 @@ partialDependence_data <- function(data, reference.var, support = 20, points[addthese] <- sapply(addthese, function(x) { seq_unif(data[[x]], length.out = support) }, simplify = FALSE) - } points <- do.call(data.table, c(list(id = 1), expand.grid(points))) # if(length(moderator) > 0 & !is.null(mod_levels)){ # points[[which(names(points) == moderator)]] <- mod_levels # } - # + # # points <- data.table(id = 1, expand.grid(points)) - - if(!is.null(mc)){ + + if (!is.null(mc)) { int.points <- data.table(id = 1, data[sample(seq_len(nrow(data)), min(mc, nrow(data))), !colnames(data) %in% reference.var, drop = FALSE]) } else { int.points <- data.table(id = 1, data[, !colnames(data) %in% reference.var, drop = FALSE]) } - out = merge(int.points, - points, - all = TRUE, - allow.cartesian = TRUE) - + out <- merge(int.points, + points, + all = TRUE, + allow.cartesian = TRUE + ) + if (!keep_id) { - out = out[,!"id", with = FALSE] + out <- out[, !"id", with = FALSE] } - + setcolorder(out, names(data)) out } -seq_unif <- function(x, length.out){ +seq_unif <- function(x, length.out) { UseMethod("seq_unif", x) } -seq_unif.numeric <- function(x, length.out){ +seq_unif.numeric <- function(x, length.out) { seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length.out = length.out) } -seq_unif.integer <- function (x, length.out) { - min.x = min(x, na.rm = TRUE) - max.x = max(x, na.rm = TRUE) +seq_unif.integer <- function(x, length.out) { + min.x <- min(x, na.rm = TRUE) + max.x <- max(x, na.rm = TRUE) unique_vals <- length(unique(x)) - x.length = max.x - min.x + x.length <- max.x - min.x if (length.out > unique_vals) { unique(sort(x)) - } - else { + } else { seq.int(min.x, max.x, length.out = length.out) } } -seq_unif.character <- function(x, length.out){ - x.length = length(unique(x)) +seq_unif.character <- function(x, length.out) { + x.length <- length(unique(x)) if (length.out < x.length) { warning("length.out is less than the number of unique values") } @@ -221,15 +222,13 @@ seq_unif.character <- function(x, length.out){ } seq_unif.factor <- function(x, length.out) { - x.length = length(unique(x)) + x.length <- length(unique(x)) if (length.out >= x.length) { sort(unique(x)) - } - else { + } else { if (is.ordered(x)) { unique(x)[seq_unif(seq_len(x.length), length.out)] - } - else { + } else { warning("length.out is less than the number of levels") sort(sample(unique(x), size = length.out)) } @@ -244,9 +243,9 @@ seq_unif.logical <- seq_unif.factor #' @method plot partialDependence #' @export -plot.partialDependence <- function(x, y, ...){ - switch(x$type, - "growth" = plot_growth(x$samples, ...), - plot_partialDependence(x = x, ...) - ) -} \ No newline at end of file +plot.partialDependence <- function(x, y, ...) { + switch(x$type, + "growth" = plot_growth(x$samples, ...), + plot_partialDependence(x = x, ...) + ) +} diff --git a/R/plot.diversityMatrix.R b/R/plot.diversityMatrix.R index 7526272..86f7e51 100644 --- a/R/plot.diversityMatrix.R +++ b/R/plot.diversityMatrix.R @@ -40,8 +40,10 @@ plot.diversityMatrix <- function(x, num.cluster=2, col.area = "gray", if (show.cluster.center) { for (i in 1:length(result$medoids)) { rad <- (max(x)-min(x))/20 - plotrix::draw.circle(x[result$medoids][i], - y[result$medoids][i],radius=rad,col=col.medoids[i]) + + graphics::symbols(x=x[result$medoids][i], + y= y[result$medoids][i], + circles = rad, add=TRUE, bg=col.medoids[i], inches=FALSE) text(x[result$medoids][i], y[result$medoids][i], labels = i) } diff --git a/R/plot.partialDependence.R b/R/plot.partialDependence.R index 0dcb481..71f8e20 100644 --- a/R/plot.partialDependence.R +++ b/R/plot.partialDependence.R @@ -4,29 +4,32 @@ plot_partialDependence <- type = "l", xlab = NULL, ylab = NULL, - ...) - { + ...) { if (is.null(parameter)) { stop("Please specify argument 'parameter'!") } - - num_reference_vars = length(x$reference.var) - + + num_reference_vars <- length(x$reference.var) + if (num_reference_vars == 1) { if (is.null(xlab)) { xlab <- x$reference.var } - + if (is.null(ylab)) { ylab <- parameter } - - is_fac = is.factor(x$samples[[x$reference.var]]) - + + is_fac <- is.factor(x$samples[[x$reference.var]]) + if (!is_fac) { - ggplot2::ggplot(x$samples, - ggplot2::aes_string(x = x$reference.var, y = parameter)) + - ggplot2::geom_line() + ggplot2::theme_light() + ggplot2::ggtitle("Partial Dependence Plot") + ggplot2::ggplot( + x$samples, + ggplot2::aes_string(x = x$reference.var, y = parameter) + ) + + ggplot2::geom_line() + + ggplot2::theme_light() + + ggplot2::ggtitle("Partial Dependence Plot") } else { ggplot2::ggplot( x$samples, @@ -37,25 +40,28 @@ plot_partialDependence <- ) ) + ggplot2::geom_bar(stat = "identity") + - ggplot2::theme_light() + ggplot2::ggtitle("Partial Dependence Plot") + ggplot2::theme_light() + + ggplot2::ggtitle("Partial Dependence Plot") } - } else if (num_reference_vars == 2) { - is_fac1 = is.factor(x$samples[[x$reference.var[1]]]) - is_fac2 = is.factor(x$samples[[x$reference.var[2]]]) - + is_fac1 <- is.factor(x$samples[[x$reference.var[1]]]) + is_fac2 <- is.factor(x$samples[[x$reference.var[2]]]) + if (is_fac1 && !is_fac2) { # swap factor to second position - is_fac1 = is_fac2 - is_fac2 = TRUE - temp = x$reference.var[1] - x$reference.var[1] = x$reference.var[2] - x$reference.var[2] = temp + is_fac1 <- is_fac2 + is_fac2 <- TRUE + temp <- x$reference.var[1] + x$reference.var[1] <- x$reference.var[2] + x$reference.var[2] <- temp } - + if (!is_fac1) { - gp <- ggplot2::ggplot(x$samples, - ggplot2::aes_string(x = x$reference.var[1], y = parameter)) + - ggplot2::geom_line() + ggplot2::theme_light() + + gp <- ggplot2::ggplot( + x$samples, + ggplot2::aes_string(x = x$reference.var[1], y = parameter) + ) + + ggplot2::geom_line() + + ggplot2::theme_light() + ggplot2::ggtitle("Partial Dependence Plot") } else { gp <- ggplot2::ggplot( @@ -67,15 +73,16 @@ plot_partialDependence <- ) ) + ggplot2::geom_bar(stat = "identity") + - ggplot2::theme_light() + ggplot2::ggtitle("Partial Dependence Plot") + ggplot2::theme_light() + + ggplot2::ggtitle("Partial Dependence Plot") } - + if (is_fac2) { gp <- gp + ggplot2::facet_wrap(x$reference.var[2]) } else { ui_stop("Plots are currently only supported if at least one reference variable is a factor.") } - + return(gp) } else { ui_stop("Plots are supported with only up to 2 reference variables.") diff --git a/R/plot.semforest.varimp.R b/R/plot.semforest.varimp.R index d21c08b..39802d5 100644 --- a/R/plot.semforest.varimp.R +++ b/R/plot.semforest.varimp.R @@ -16,7 +16,7 @@ plot.semforest.varimp <- vimp <- x - if (hasName(vimp,"boruta")) { + if (utils::hasName(vimp,"boruta")) { filter_ids <- 1:(ncol(vimp$importance)/2) vimp$var.names <- vimp$var.names[filter_ids] vimp$importance <- vimp$importance[,filter_ids] @@ -105,7 +105,7 @@ plot.semforest.varimp <- par(mai = c(1.02, linch, 0.82, 0.42)) col <- NULL - if (hasName(vimp,"boruta")) { + if (utils::hasName(vimp,"boruta")) { col <- ifelse(vimp$filter,"grey","white") } @@ -119,8 +119,8 @@ plot.semforest.varimp <- col = NULL, ... ) - if (hasName(vimp,"boruta")) { - abline(v=vimp$boruta_threshold,lwd=2) + if (utils::hasName(vimp,"boruta")) { + graphics::abline(v=vimp$boruta_threshold,lwd=2) } } diff --git a/R/plot.semtree.R b/R/plot.semtree.R index ab75355..223660c 100644 --- a/R/plot.semtree.R +++ b/R/plot.semtree.R @@ -5,33 +5,33 @@ plot.semtree <- function(x, ...) { if (is.null(x)) { cat("Argument is not a SEM tree!") - + return(NULL) - } - + tree <- x - + to.rpart.rec <- function(x, xx) { if (is.null(xx)) { num <- 0 level <- 1 } else { - num <- sum(2 ^ (length(xx):1 - 1) * xx) - level <- 2 ** length(xx) + num <- sum(2^(length(xx):1 - 1) * xx) + level <- 2**length(xx) } - + num <- num + level - + data <- c() - + # var n wt dev yval complexity ncompete nsurrogate if (x$caption == "TERMINAL") { # # create a data row for a terminal node # row <- - c("", + c( + "", x$N, x$N, NA, @@ -41,46 +41,44 @@ plot.semtree <- function(x, 0, num, paste(paste( - x$param_names, "=" , round(x$params, 3) - ) , collapse = "\n"), - "") + x$param_names, "=", round(x$params, 3) + ), collapse = "\n"), + "" + ) data <- rbind(data, row) - } else { # # create data row for a decision node # - + # -- prepare statistic and/or p-value - #crit <- "?" + # crit <- "?" # options("scipen"=5) - #if (tree$p.values.valid) { + # if (tree$p.values.valid) { # if(x$p<.001){crit <- paste("p<0.001")} # else{crit <- paste("p=",round(x$p,3),sep="")} - #} else { + # } else { crit <- paste("LR=", round(x$lr, 1), "(df=", x$df, ")", sep = "") - #} - + # } + row <- c(x$caption, x$N, x$N, 0, x$node_id, 0, 0, 0, num, NA, crit) data <- rbind(data, row) row <- to.rpart.rec(x$left_child, append(xx, 0)) - + data <- rbind(data, row) row <- to.rpart.rec(x$right_child, append(xx, 1)) data <- rbind(data, row) - } return(data) - } - - + + l <- list() data <- to.rpart.rec(x, NULL) - - + + l$frame <- data l$frame <- data.frame(l$frame, row.names = l$frame[, 9]) names(l$frame) <- @@ -97,65 +95,69 @@ plot.semtree <- function(x, "estimates", "crit" ) - - - + + + l$frame[, 1] <- as.factor(l$frame[, 1]) l$frame$dev <- as.numeric(as.character(l$frame$dev)) - #l$frame$yval <- as.numeric(as.character(l$frame$yval)) + # l$frame$yval <- as.numeric(as.character(l$frame$yval)) l$frame$yval <- as.numeric(as.character(l$frame$yval)) l$frame$wt <- as.numeric(as.character(l$frame$wt)) l$frame$complexity <- as.numeric(as.character(l$frame$complexity)) l$frame$n <- as.numeric(as.character(l$frame$n)) l$frame$ncompete <- as.numeric(l$frame$ncompete) l$frame$estimates <- as.character(l$frame$estimates) - + l$frame$nsurrogate <- as.numeric(l$frame$nsurrogate) - - l$method <- "anova" #"semtree" - + + l$method <- "anova" # "semtree" + # kindly borrowed from rpart formatg <- - function (x, - digits = getOption("digits"), - format = paste0("%.", - digits, "g")) - { - if (!is.numeric(x)) + function(x, + digits = getOption("digits"), + format = paste0( + "%.", + digits, "g" + )) { + if (!is.numeric(x)) { stop("'x' must be a numeric vector") + } temp <- sprintf(format, x) - if (is.matrix(x)) + if (is.matrix(x)) { matrix(temp, nrow = nrow(x)) - else + } else { temp + } } - - + + l$functions$summary <- - function (yval, dev, wt, ylevel, digits) - { + function(yval, dev, wt, ylevel, digits) { paste(" mean=", - formatg(yval, digits), - ", MSE=", - formatg(dev / wt, - digits), - sep = "") + formatg(yval, digits), + ", MSE=", + formatg( + dev / wt, + digits + ), + sep = "" + ) } - - + + l$functions$text <- - function (yval, dev, wt, ylevel, digits, n, use.n) - { - #if (use.n) { + function(yval, dev, wt, ylevel, digits, n, use.n) { + # if (use.n) { paste("#", yval, ", N=", n, sep = "") - #} - #else { + # } + # else { # paste(formatg(yval, digits)) - #} + # } } - + class(l) <- "rpart" - + if (no.plot) { return(l) } else { @@ -169,5 +171,4 @@ plot.semtree <- function(x, ... ) } - } diff --git a/R/plot.varimpConvergence.R b/R/plot.varimpConvergence.R index 3f37211..70cd470 100644 --- a/R/plot.varimpConvergence.R +++ b/R/plot.varimpConvergence.R @@ -8,11 +8,13 @@ nacummedian <- function(x) { sapply(1:length(x), function(xx){median(x[1:xx])}) } -plot.varimpConvergence <- function( vim, lty=NULL, idx=NULL, +plot.varimpConvergence <- function( x, lty=NULL, idx=NULL, legend.x="topright", clw=4, legend.cex=1.5,ylim=NULL, ...) { + vim <- x + impsum <- apply(vim$importance, 2, nacumsum) M <- ncol(impsum) diff --git a/R/plotParDiffForest.R b/R/plotParDiffForest.R index 36b59e1..9ae77ea 100644 --- a/R/plotParDiffForest.R +++ b/R/plotParDiffForest.R @@ -6,7 +6,7 @@ #' are "boxplot" (default) and "jitter" for a jittered strip plot with mean and #' standard deviation. #' @param measure a character. "wald" (default) gives the squared parameter -#' differences devided by their pooled standard errors. "test" gives the +#' differences divided by their pooled standard errors. "test" gives the #' contributions of the parameters to the test statistic. "raw" gives the #' absolute values of the parameter differences. #' @param normalize logical value; if TRUE parameter differences of each split diff --git a/R/plotParDiffTree.R b/R/plotParDiffTree.R index 43090c6..63fba6f 100644 --- a/R/plotParDiffTree.R +++ b/R/plotParDiffTree.R @@ -5,7 +5,7 @@ #' @param plot a character that specifies the plot typ. Available plot types #' are "ballon" (default), "heatmap", and "bar". #' @param measure a character. "wald" (default) gives the squared parameter -#' differences devided by their pooled standard errors. "test" gives the +#' differences divided by their pooled standard errors. "test" gives the #' contributions of the parameters to the test statistic. "raw" gives the #' absolute values of the parameter differences. #' @param normalize logical value; if TRUE parameter differences of each split diff --git a/R/print.semforest.R b/R/print.semforest.R index 123ceb7..29395c3 100644 --- a/R/print.semforest.R +++ b/R/print.semforest.R @@ -1,27 +1,21 @@ #' @exportS3Method print semforest -print.semforest <- function(x, ...) -{ - invalid.trees <- sum(sapply(x$forest,FUN=is.null)) +print.semforest <- function(x, ...) { + invalid.trees <- sum(sapply(x$forest, FUN = is.null)) - cat(paste("SEM forest with ",length(x$forest)," trees.","\n")) - if (invalid.trees > 0) { - cat(paste("Of these trees, ",invalid.trees," trees are invalid due to errors.\n")) - } - - + cat(paste("SEM forest with ", length(x$forest), " trees.", "\n")) + if (invalid.trees > 0) { + cat(paste("Of these trees, ", invalid.trees, " trees are invalid due to errors.\n")) + } } #' @exportS3Method print semforest_stripped -print.semforest_stripped <- function(x, ...) -{ - invalid.trees <- sum(sapply(x$forest,FUN=is.null)) - - - cat(paste("SEM forest [stripped] with ",length(x$forest)," trees.","\n")) +print.semforest_stripped <- function(x, ...) { + invalid.trees <- sum(sapply(x$forest, FUN = is.null)) + + + cat(paste("SEM forest [stripped] with ", length(x$forest), " trees.", "\n")) if (invalid.trees > 0) { - cat(paste("Of these trees, ",invalid.trees," trees are invalid due to errors.\n")) + cat(paste("Of these trees, ", invalid.trees, " trees are invalid due to errors.\n")) } - - -} \ No newline at end of file +} diff --git a/R/print.semtree.R b/R/print.semtree.R index 89ba674..2a3791f 100644 --- a/R/print.semtree.R +++ b/R/print.semtree.R @@ -3,38 +3,33 @@ print.semtree <- function(x, level = 0, p.values.valid = NULL, - ...) - { + ...) { tree <- x - + indent <- paste(rep("| ", level), collapse = "", sep = "") - + if (level > 0) { edge_label <- tree$edge_label } else { edge_label <- "ROOT" - } - + caption <- tree$caption - - if (caption == "TERMINAL") - { - #caption <- paste(caption,"(ID=",tree$node_id,")",sep="") + + if (caption == "TERMINAL") { + # caption <- paste(caption,"(ID=",tree$node_id,")",sep="") output <- paste(indent, - "|-[", - tree$node_id, - "] ", - caption, - " [N=", - tree$N, - "]\n", - sep = "") - - } - - else { + "|-[", + tree$node_id, + "] ", + caption, + " [N=", + tree$N, + "]\n", + sep = "" + ) + } else { output <- paste( indent, @@ -52,28 +47,23 @@ print.semtree <- "]\n", sep = "" ) - } - - if (tree$caption != "TERMINAL") - { + + if (tree$caption != "TERMINAL") { output <- paste( output, print.semtree(tree$left_child, level + 1, p.values.valid), print.semtree(tree$right_child, level + 1, p.values.valid) ) - } - + if (level == 0) { output <- paste("SEMtree with numbered nodes\n", output) - + cat(output) - } else { return(output) - } } diff --git a/R/prune.semforest.R b/R/prune.semforest.R index f7387f3..20af8cc 100644 --- a/R/prune.semforest.R +++ b/R/prune.semforest.R @@ -1,4 +1,4 @@ -#' @exportS3Method prune semtree +#' @exportS3Method prune semforest prune.semforest <- function(object, max.depth=NULL, num.trees=NULL, ...) { if (!is.null(num.trees)) { diff --git a/R/recodeAllSubsets.R b/R/recodeAllSubsets.R index fcc67a3..464d7f7 100644 --- a/R/recodeAllSubsets.R +++ b/R/recodeAllSubsets.R @@ -31,8 +31,7 @@ recodeAllSubsets <- complement <- c() for (j in 1:length(values_set)) { - if (bitops::bitAnd(i, 2 ** (j - 1)) > 0) { - #print(values_set[j]) + if (floor(i / 2^(j-1)) %% 2 > 0) { temp_set <- append(temp_set, values_set[j]) } else { complement <- append(complement, values_set[j]) diff --git a/R/scoreSplit.R b/R/scoreSplit.R index b9179d5..e845dea 100644 --- a/R/scoreSplit.R +++ b/R/scoreSplit.R @@ -37,9 +37,18 @@ ScoreSplit <- function(model = NULL, mydata = NULL, control = NULL, n_obs <- nobs(model) # get covariance matrix of the model parameters - vcov. <- solve(vcov_semtree(model) * n_obs) - vcov. <- strucchange::root.matrix(vcov.) + vcov. <- tryCatch({ + solve(vcov_semtree(model) * n_obs) + }, error=function(e){ + ui_fail("An error occured inverting the vcov model matrix when computing scores! Nobs=",n_obs," Aborting.") + NULL + }) + + if (is.null(vcov.)) { + return(NULL) + } + vcov. <- strucchange::root.matrix(vcov.) ############################################ # main loop with calls to sctest_semtree() # diff --git a/R/semforest.R b/R/semforest.R index b5d86fe..19fc683 100644 --- a/R/semforest.R +++ b/R/semforest.R @@ -114,6 +114,13 @@ semforest <- function(model, } + # set mtry heuristically if not set manually + if (is.null(semforest.control$mtry)) { + num_covariates <- length(covariate.ids) + mtry <- ceiling(sqrt(num_covariates)) + ui_message("Setting mtry = ",mtry," based on ",num_covariates," predictors.\n") + } + # pass mtry from forest to tree control if (!is.na(semforest.control$semtree.control$mtry)) { ui_stop( diff --git a/R/semforest.control.R b/R/semforest.control.R index b1cba14..0238afe 100644 --- a/R/semforest.control.R +++ b/R/semforest.control.R @@ -4,7 +4,7 @@ #' algorithm. #' #' -#' @aliases semforest.control print.semforest.control semforest_score_control +#' @aliases semforest.control semforest_control print.semforest.control semforest_score_control #' @param num.trees Number of trees. #' @param sampling Sampling procedure. Can be subsample or bootstrap. #' @param control A SEM Tree control object. Will be generated by default. @@ -36,7 +36,7 @@ semforest.control <- options$semtree.control$exclude.heywood <- FALSE } else { # 1.9.2022: switch refit to TRUE - if (isFALSE(control$refit)) { + if (base::isFALSE(control$refit)) { warning("refit = FALSE detected. Models in root nodes require estimation for forests. Set refit to TRUE") control$refit <- TRUE } @@ -55,3 +55,9 @@ semforest_score_control <- function(...) return(ctrl) } + +#' @export +semforest_control <- function(...) +{ + semforest.control(...) +} diff --git a/R/semtree-package.R b/R/semtree-package.R index 736c411..06f01e3 100644 --- a/R/semtree-package.R +++ b/R/semtree-package.R @@ -1,4 +1,5 @@ #' @title SEM Tree Package +#' @name semtree-package #' @importFrom stats as.formula predict #' @importFrom lavaan lavScores nobs vcov #' @importFrom utils toLatex @@ -6,18 +7,14 @@ #' @importFrom strucchange catL2BB maxBB meanL2BB ordL2BB ordwmax root.matrix sctest supLM #' @importFrom sandwich bread #' @importFrom methods is -#' @importFrom sets as.set #' @importFrom parallel parLapply clusterMap #' @importFrom utils flush.console getS3method sessionInfo str setTxtProgressBar data #' @importFrom stats as.dist cmdscale coef cor cov logLik median pchisq qnorm runif var dist rnorm #' @importFrom graphics barplot legend lines pairs par plot strwidth text hist #' @import OpenMx -#' @importFrom bitops bitAnd #' @import rpart #' @importFrom methods hasArg #' @importFrom data.table data.table -NULL - .SCALE_METRIC = 2 .SCALE_ORDINAL = 3 .SCALE_CATEGORICAL = 1 @@ -42,49 +39,7 @@ NULL -#' Merge two SEM forests -#' -#' This overrides generic base::merge() to merge two forests into one. -#' -#' -#' @aliases merge.semforest -#' @param x A SEM Forest -#' @param y A second SEM Forest -#' @param list() Extra arguments. Currently unused. -#' @author Andreas M. Brandmaier, John J. Prindle -#' @seealso \code{\link{semtree}} -#' @references Brandmaier, A.M., Oertzen, T. v., McArdle, J.J., & Lindenberger, -#' U. (2013). Structural equation model trees. \emph{Psychological Methods}, -#' 18(1), 71-86. -NULL - - - - -#' SEMtrees Parameter Estimates Standard Error Table -#' -#' Returns a table of standard errors with columns corresponding to freely -#' estimated standard errors and rows corresponding to nodes in the tree. -#' -#' The row names of the resulting data frame correspond to internal node ids -#' and the column names correspond to standard errors in the SEM. Parameter -#' estimates can be obtained from \code{\link{parameters}}. -#' -#' @aliases se -#' @param tree A SEMtree object obtained from \code{\link{semtree}} -#' @param leafs.only Default = TRUE. Only the terminal nodes (leafs) are -#' printed. If set to FALSE, all node standard errors are written to the -#' \code{data.frame}. -#' @return Returns a \code{data.frame} with rows for parameters and columns for -#' terminal nodes. -#' @author Andreas M. Brandmaier, John J. Prindle -#' @seealso \code{\link{semtree}}, \code{\link{semtree.control}}, -#' \code{\link{parameters}} -#' @references Brandmaier, A.M., Oertzen, T. v., McArdle, J.J., & Lindenberger, -#' U. (2013). Structural equation model trees. \emph{Psychological Methods}, -#' 18(1), 71-86. -NULL diff --git a/R/semtree.R b/R/semtree.R index 948cf7c..1bd434c 100644 --- a/R/semtree.R +++ b/R/semtree.R @@ -1,31 +1,31 @@ #' SEM Tree: Recursive Partitioning for Structural Equation Models -#' +#' #' Structural equation model (SEM) trees are a combination of SEM and decision #' trees (also known as classification and regression trees or recursive #' partitioning). SEM trees hierarchically split empirical data into #' homogeneous groups sharing similar data patterns with respect to a SEM by #' recursively selecting optimal predictors of these differences from a #' potentially large set of predictors. -#' +#' #' Calling \code{semtree} with an \code{\link{OpenMx}} or #' \code{\link[lavaan]{lavaan}} model creates a tree that recursively #' partitions a dataset such that the partitions maximally differ with respect #' to the model-predicted distributions. Each resulting subgroup (represented #' as a leaf in the tree) is represented by a SEM with a distinct set of #' parameter estimates. -#' +#' #' Predictors (yet unmodeled variables) can take on any form for the splitting #' algorithm to function (categorical, ordered categories, continuous). Care #' must be taken in choosing how many predictors to include in analyses because #' as the number of categories grows for unordered categorical variables, the #' number of multigroup comparisons increases exponentially for unordered #' categories. -#' +#' #' Currently available evaluation methods for assessing partitions: -#' +#' #' 1. "naive" selection method compares all possible split values to one #' another over all predictors included in the dataset. -#' +#' #' 2. "fair" selection uses a two step procedure for analyzing split values on #' predictors at each node of the tree. The first phase uses half of the sample #' to examine the model improvement for each split value on each predictor, and @@ -34,14 +34,14 @@ #' predictor on the second half of the sample. The best improvement for the c #' splits tested on c predictors is selected for the node and the dataset is #' split from this node for further testing. -#' +#' #' 3. "score" uses score-based test statistics. These statistics are much #' faster than the classic SEM tree approach while having favorable -#' statistical properties. -#' +#' statistical properties. +#' #' All other parameters controlling the tree growing process are available #' through a separate \code{\link{semtree.control}} object. -#' +#' #' @aliases semtree plot.semtree print.semtree summary.semtree toLatex.semtree #' nodeFunSemtree #' @param model A template model specification from \code{\link{OpenMx}} using @@ -75,20 +75,27 @@ #' \code{\link{parameters}}, \code{\link{se}}, \code{\link{prune.semtree}}, #' \code{\link{subtree}}, \code{\link[OpenMx]{OpenMx}}, #' \code{\link[lavaan]{lavaan}} -#' @references +#' @references #' Brandmaier, A.M., Oertzen, T. v., McArdle, J.J., & Lindenberger, U. (2013). Structural equation model trees. \emph{Psychological Methods}, 18(1), 71-86. -#' @references +#' @references #' Arnold, M., Voelkle, M. C., & Brandmaier, A. M. (2021). Score-guided structural equation model trees. \emph{Frontiers in Psychology}, 11, Article 564403. https://doi.org/10.3389/fpsyg.2020.564403 #' #' @keywords tree models multivariate -#' +#' #' @export -semtree <- function(model, data=NULL, control=NULL, constraints=NULL, - predictors = NULL, ...) { +semtree <- function(model, data = NULL, control = NULL, constraints = NULL, + predictors = NULL, ...) { + + # some checks on the data + if (!is.null(data)) { + if (!is.data.frame(data)) { + stop("Error with 'data' argument: semtree currently only supports data frames.") + } + } # TODO: change this throughout dataset <- data - + # obtain dots arguments and test for deprecated use of arguments arguments <- list(...) if ("global.constraints" %in% names(arguments)) { @@ -97,255 +104,313 @@ semtree <- function(model, data=NULL, control=NULL, constraints=NULL, if ("invariance" %in% names(arguments)) { stop("Deprecated use of argument 'invariance'. Please use constraints object with property 'local.invariance'") } - + if (is.null(constraints)) { constraints <- semtree.constraints() } - + covariates <- predictors - + # backwards-compatibility - if ( (!is.null(arguments)) & ("covariates" %in% names(arguments)) ) { + if ((!is.null(arguments)) & ("covariates" %in% names(arguments))) { if (is.null(predictors)) { - #report(paste("Setting arguments to ",paste(arguments$covariates)),1) + # report(paste("Setting arguments to ",paste(arguments$covariates)),1) covariates <- arguments$covariates } else { stop("Cannot have both arguments 'predictors' and 'covariates' in SEM Tree model.") } } - - - + + + invariance <- constraints$local.invariance global.constraints <- constraints$global.invariance - - - + + + # create default control object, if not specified if (is.null(control)) { control <- semtree.control() - if (control$verbose) + if (control$verbose) { ui_message("Default SEMtree settings established since no Controls provided.") + } } else { - if (checkControl(control)!=TRUE) {stop( "Unknown options in semtree.control object!");} + if (checkControl(control) != TRUE) { + stop("Unknown options in semtree.control object!") + } } + + # here we decide between four cases depending + # on whether min.N is given and/or min.bucket is given + # this is a really dumb heuristic + # please can someone replace this with something more useful + # this based on (Bentler & Chou, 1987; see also Bollen, 1989) - if (control$method=="cv") { - ui_stop("This method ceased to exist. Please see modern score-based tests.") + if (is.null(control$min.N)) { + + if (is.null(control$min.bucket)) { + # both values were not specified + control$min.N <- max(20, 5 * npar(model)) + control$min.bucket <- max(10, control$min.N / 2) + } else { + # only min.bucket was given, min.N was not specified + control$min.N <- control$min.bucket * 2 + } + } else { + if (is.null(control$min.bucket)) { + # only min.N was given, min.bucket was not specified + control$min.bucket <- max(10, control$min.N / 2) + } else { + # do nothing, both values were specified + if (control$min.bucket > control$min.N) { + warning("Min.bucket parameter should probably be smaller than min.N!") + } + } } + if (is.null(control$min.N)) { + + } + + # set min.bucket and min.N heuristically + if (is.null(control$min.bucket)) { + + } + + if (control$method == "cv") { + ui_stop("This method ceased to exist. Please see modern score-based tests.") + } + # check whether data is complete for score-tests # this probably should be a more fine-grained check some day # that tests only model variables and selected predictors if (control$method == "score") { - check_complete = all(stats::complete.cases(data)) - if (!check_complete) + check_complete <- all(stats::complete.cases(data)) + if (!check_complete) { ui_stop("If score tests are used, data must not contain N/A in either the predictors or model variables.") + } } - + # check for correct model entry - if (inherits(model,"MxModel") || inherits(model,"MxRAMModel")) { - if (control$verbose) { message("Detected OpenMx model.") } - control$sem.prog = "OpenMx" - } else if (inherits(model,"lavaan")){ - #if (control$verbose) { ui_message("Detected lavaan model.") } - control$sem.prog = "lavaan" - } else if ((inherits(model,"ctsemFit")) || (inherits(model,"ctsemInit"))) { - #if (control$verbose) { ui_message("Detected ctsem model.") } - control$sem.prog = "ctsem" + if (inherits(model, "MxModel") || inherits(model, "MxRAMModel")) { + if (control$verbose) { + message("Detected OpenMx model.") + } + control$sem.prog <- "OpenMx" + } else if (inherits(model, "lavaan")) { + # if (control$verbose) { ui_message("Detected lavaan model.") } + control$sem.prog <- "lavaan" + } else if ((inherits(model, "ctsemFit")) || (inherits(model, "ctsemInit"))) { + # if (control$verbose) { ui_message("Detected ctsem model.") } + control$sem.prog <- "ctsem" + + ctsemomx_omx_installed <- "ctsemOMX" %in% utils::installed.packages()[,"Package"] + if (!ctsemomx_omx_installed) { + stop("Please install ctsemOMX first.") + } + } else { - ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!"); + ui_stop("Unknown model type selected. Use OpenMx or lavaanified lavaan models!") } - + # set the mtry value to default=0 if not set if (is.na(control$mtry)) control$mtry <- 0 - - + + # some checks if (!is.null(constraints$focus.parameters)) { - if (control$sem.prog != "OpenMx") { ui_stop("Focus parameters are only supported with OpenMx!") } - - num.match <- length(constraints$focus.parameters %in% - OpenMx::omxGetParameters(model)) + + num.match <- length(constraints$focus.parameters %in% + OpenMx::omxGetParameters(model)) if (num.match != length(constraints$focus.parameters)) { ui_stop("Error! Not all focus parameters are free parameters in the model!") } } - + # add data to model if not already done and sort covariates from model variables ########################################################### ### OPENMX USED HERE ### ########################################################### - if((control$sem.prog=='OpenMx') || (control$sem.prog=='ctsem')){ - - if ((control$sem.prog=='ctsem')) { + if ((control$sem.prog == "OpenMx") || (control$sem.prog == "ctsem")) { + if ((control$sem.prog == "ctsem")) { ## 11.08.2022: check data format. Currently, only wide format is supported. - if (all(is.na(match(paste0(model$ctmodelobj$manifestNames, "_T0"), - colnames(dataset))))) { + if (all(is.na(match( + paste0(model$ctmodelobj$manifestNames, "_T0"), + colnames(dataset) + )))) { stop("Long format data detected. Data need to be in wide format.") # Check if the model unsupported components # to be done - } - - model$mxobj@manifestVars <- paste0(model$ctmodelobj$manifestNames, "_T", - rep(0:(model$ctmodelobj$Tpoints - 1), - each = model$ctmodelobj$n.manifest)) + } + + model$mxobj@manifestVars <- paste0( + model$ctmodelobj$manifestNames, "_T", + rep(0:(model$ctmodelobj$Tpoints - 1), + each = model$ctmodelobj$n.manifest + ) + ) mxmodel <- model$mxobj } else { mxmodel <- model } - - if(is.null(dataset)) { + + if (is.null(dataset)) { if (is.null(mxmodel@data)) { stop("MxModel has no data associated!") } dataset <- mxmodel@data@observed } - + # sanity check if (any(!(covariates %in% names(dataset)))) { stop( - paste("Some of the specified predictors are not in the dataset provided: ", - paste(covariates[ (!(covariates %in% names(dataset)))],sep="",collapse = ",") - )) + paste( + "Some of the specified predictors are not in the dataset provided: ", + paste(covariates[(!(covariates %in% names(dataset)))], sep = "", collapse = ",") + ) + ) } - + tmp <- getPredictorsOpenMx(mxmodel, dataset, covariates) model.ids <- tmp[[1]] covariate.ids <- tmp[[2]] - + # check whether character columns are given as predictors for (i in covariate.ids) { - if (!is.factor(dataset[,i]) && !is.numeric(dataset[,i])) { + if (!is.factor(dataset[, i]) && !is.numeric(dataset[, i])) { # this column is neither numeric or a factor, thus cannot be handled # probably a vector of strings - ui_stop("Predictor '", colnames(dataset)[i],"' is neither a factor nor numeric. This is likely causing trouble. Please remove or specify as factor or ordered.") + ui_stop("Predictor '", colnames(dataset)[i], "' is neither a factor nor numeric. This is likely causing trouble. Please remove or specify as factor or ordered.") } } - + # check whether numeric covariates have more than 9 observed values # if score-tests are used, otherwise score statistics can become # unstable - if (control$method=="score") { - for (i in covariate.ids) { - if (!is.factor(dataset[,i]) && is.numeric(dataset[,i])) { - # this column is numeric, should have more than 9 unique values! - check_9levels = length(unique(dataset[,i]))>9 - if (!check_9levels) - ui_warn("Predictor '", colnames(dataset)[i],"' has 9 or fewer unique values. Consider coding as ordinal to avoid instability with score-based tests.") + if (control$method == "score") { + for (i in covariate.ids) { + if (!is.factor(dataset[, i]) && is.numeric(dataset[, i])) { + # this column is numeric, should have more than 9 unique values! + check_9levels <- length(unique(dataset[, i])) > 9 + if (!check_9levels) { + ui_warn("Predictor '", colnames(dataset)[i], "' has 9 or fewer unique values. Consider coding as ordinal to avoid instability with score-based tests.") + } + } } } - } - + # 15.08.2022: all OpenMx models are estimated here if not already estimated ## ctsem are already estimated once - if (control$sem.prog == 'OpenMx' && !summary(model)$wasRun) { - ui_message("Model was not run. Estimating parameters now.") - suppressMessages(model <- OpenMx::mxTryHard(model = model, paste=FALSE, silent = TRUE)) + if (control$sem.prog == "OpenMx" && !summary(model)$wasRun) { + ui_message("Model was not run. Estimating parameters now.") + suppressMessages(model <- OpenMx::mxTryHard(model = model, paste = FALSE, silent = TRUE)) } - - + + # Prepare objects for fast score calculation - ## Only for linear models (semtree$linear == TRUE) or for models with definition variables + ## Only for linear models (semtree$linear == TRUE) or for models with definition variables # Note: model must be run - this is assured by previous code block that performs mxTryHard() - if (control$method == "score" & control$sem.prog == 'OpenMx') { - control <- c(control, - list(scores_info = OpenMx_scores_input(x = model, - control = control))) - } - - - + if (control$method == "score" & control$sem.prog == "OpenMx") { + control <- c( + control, + list(scores_info = OpenMx_scores_input( + x = model, + control = control + )) + ) + } } - + ########################################################### ### lavaan USED HERE ### ########################################################### - if(control$sem.prog=='lavaan'){ - if(is.null(dataset)) { + if (control$sem.prog == "lavaan") { + if (is.null(dataset)) { ui_stop("Must include data for analysis!") } - + tmp <- getPredictorsLavaan(model, dataset, covariates) model.ids <- tmp[[1]] covariate.ids <- tmp[[2]] - } - + meta <- list() meta$model.ids <- model.ids meta$covariate.ids <- covariate.ids - + # init unique node counter - # assign("global.node.id",1,envir = getSemtreeNamespace()) + # assign("global.node.id",1,envir = getSemtreeNamespace()) # TODO: is there a better way to assign ids? - setGlobal("global.node.id",1) - - #create default constraints if none specified for invariance testing of nested models + setGlobal("global.node.id", 1) + + # create default constraints if none specified for invariance testing of nested models if (is.null(invariance)) { - invariance <- NULL - } - else { + invariance <- NULL + } else { if (control$method != "naive") { ui_message("Invariance is only implemented for naive variable selection.") return(NULL) } - if(is.na(control$alpha.invariance)){ + if (is.na(control$alpha.invariance)) { ui_message("No Invariance alpha selected. alpha.invariance set to:", control$alpha) - control$alpha.invariance<-control$alpha} - - if(is(invariance, "character")) { - invariance <- list(invariance) - } else { - if (!is(invariance, "list")) { - ui_stop("Invariance must contain an array of parameter names or a list of such arrays.") - } - } + control$alpha.invariance <- control$alpha + } + if (is(invariance, "character")) { + invariance <- list(invariance) + } else { + if (!is(invariance, "list")) { + ui_stop("Invariance must contain an array of parameter names or a list of such arrays.") + } + } } - + # heuristic checks whether variables are correctly coded # to avoid problems in the computation of test statistics for (cid in covariate.ids) { - column <- dataset[, cid] + column <- dataset[, cid] if (is.numeric(column)) { - if (length(unique(column))<=10) { ui_warn("Variable ",names(dataset)[cid]," is numeric but has only few unique values. Consider recoding as ordered factor." )} + if (length(unique(column)) <= 10) { + ui_warn("Variable ", names(dataset)[cid], " is numeric but has only few unique values. Consider recoding as ordered factor.") + } } } - + # check for no missing data in covariates if score statistics are used if (control$method == "score") { for (cid in covariate.ids) { - column <- dataset[, cid] - if (sum(is.na(column))>0) { ui_stop("Variable ",names(dataset)[cid]," has missing values. Computation of score statistic not possible."); return(NULL); } - } + column <- dataset[, cid] + if (sum(is.na(column)) > 0) { + ui_stop("Variable ", names(dataset)[cid], " has missing values. Computation of score statistic not possible.") + return(NULL) + } + } } - - + + # correct method selection check - method.int <- pmatch(control$method, c("cv","naive","fair","fair3","score")) + method.int <- pmatch(control$method, c("cv", "naive", "fair", "fair3", "score")) if (is.na(method.int)) { ui_stop("Unknown method in control object! Try either 'naive', 'fair', 'fair3', 'score', or 'cv'.") - } - + } + # if this is still null, no data was given if (is.null(dataset)) { ui_stop("No data were provided!") } - + # sanity checks, duplicated col names? - if (any(duplicated(names(dataset)))) - { + if (any(duplicated(names(dataset)))) { ui_stop("Dataset contains duplicated columns names!") } - + # set a seed for user to repeat results if no seed provided - if (!is.null(control$seed)&!is.na(control$seed)){ + if (!is.null(control$seed) & !is.na(control$seed)) { set.seed(control$seed) } ########################################################### @@ -353,57 +418,61 @@ semtree <- function(model, data=NULL, control=NULL, constraints=NULL, ########################################################### # global constraints - estimate once and then regarded fixed in the tree if (!is.null(global.constraints)) { - if (control$sem.prog != "OpenMx") { ui_stop("Global constraints are not yet supported!") } - - run.global <- OpenMx::mxRun(model, silent=TRUE, useOptimizer=TRUE, suppressWarnings=TRUE); + + run.global <- OpenMx::mxRun(model, silent = TRUE, useOptimizer = TRUE, suppressWarnings = TRUE) labels <- names(OpenMx::omxGetParameters(model)) eqids <- which(labels %in% global.constraints) neqids <- which(!labels %in% global.constraints) values <- OpenMx::omxGetParameters(run.global)[eqids] - model <- OpenMx::omxSetParameters(model, - labels=global.constraints,free=FALSE, values=values) + model <- OpenMx::omxSetParameters(model, + labels = global.constraints, free = FALSE, values = values + ) # FIX THIS LINE HERE - + # Read Global Constraints and New model Parameters Here. - ui_message("Global Constraints:\n",paste(global.constraints,collapse=" ")) - ui_message("Freely Estimated Parameters:\n",paste(names(OpenMx::omxGetParameters(model)),collapse=" ")) + ui_message("Global Constraints:\n", paste(global.constraints, collapse = " ")) + ui_message("Freely Estimated Parameters:\n", paste(names(OpenMx::omxGetParameters(model)), collapse = " ")) } - - + + # grow tree - if(control$sem.prog == 'OpenMx'){ - if (control$verbose){message('OpenMx model estimation selected!')} - } - else if(control$sem.prog == 'lavaan'){ - if (control$verbose){message('lavaan model estimation selected!')} - - } - else if(control$sem.prog == 'ctsem'){ - if (control$verbose){message('ctsem model estimation selected!')} - } - else { + if (control$sem.prog == "OpenMx") { + if (control$verbose) { + message("OpenMx model estimation selected!") + } + } else if (control$sem.prog == "lavaan") { + if (control$verbose) { + message("lavaan model estimation selected!") + } + } else if (control$sem.prog == "ctsem") { + if (control$verbose) { + message("ctsem model estimation selected!") + } + } else { stop("Unknown model type. Use OpenMx or lavaans models only!") } - - + + # save time before starting the actual tree growing start.time <- proc.time() - - # start the recursive growTree() function to do the + + # start the recursive growTree() function to do the # actual heavy lifting - tree <- growTree(model=model, mydata=dataset, control=control, - invariance=invariance, meta=meta, - constraints=constraints, ...) - - + tree <- growTree( + model = model, mydata = dataset, control = control, + invariance = invariance, meta = meta, + constraints = constraints, ... + ) + + # determine time elapsed - elapsed <- proc.time()-start.time - - + elapsed <- proc.time() - start.time + + # save various information in the result object and # assign it class 'semtree' tree$elapsed <- elapsed @@ -411,11 +480,12 @@ semtree <- function(model, data=NULL, control=NULL, constraints=NULL, tree$constraints <- constraints tree$version <- tryCatch(sessionInfo()$otherPkgs$semtree$Version) class(tree) <- "semtree" - + # tell the user that everything is OK - ui_ok("Tree construction finished [took ", - human_readable_time(elapsed[3]),"].") - + ui_ok( + "Tree construction finished [took ", + human_readable_time(elapsed[3]), "]." + ) + return(tree) - } diff --git a/R/semtree.control.R b/R/semtree.control.R index f8c1410..aac81c0 100644 --- a/R/semtree.control.R +++ b/R/semtree.control.R @@ -17,7 +17,7 @@ #' implements modern score-based statistics. #' #' -#' @aliases semtree.control print.semtree.control +#' @aliases semtree.control print.semtree.control semtree_control #' @param method Default: 'naive'. One out of #' \code{c("score","fair","naive")} for either an unbiased two-step #' selection algorithm, a naive take-the-best, or a @@ -65,7 +65,7 @@ #' correction for the naive test counts the number of dichotomous tests. When #' set to one, bonferroni correction counts the number of variables tested. #' @param missing Missing value treatment. Default is ignore -#' @param use.maxlm Use MaxLm statistic +#' @param use.maxlm Use MaxLR statistic for split point selection (as proposed by Arnold et al., 2021) #' @param strucchange.from Strucchange argument. See their package #' documentation. #' @param strucchange.to Strucchange argument. See their package documentation. @@ -81,6 +81,9 @@ #' @references Brandmaier, A.M., Oertzen, T. v., McArdle, J.J., & Lindenberger, #' U. (2013). Structural equation model trees. \emph{Psychological Methods}, #' 18(1), 71-86. +#' @references +#' Arnold, M., Voelkle, M. C., & Brandmaier, A. M. (2021). Score-guided structural equation model trees. \emph{Frontiers in Psychology}, 11, Article 564403. https://doi.org/10.3389/fpsyg.2020.564403 + #' @examples #' #' @@ -96,8 +99,8 @@ #' #' @export semtree.control <- - function(method = "naive", - min.N = 20, + function(method = c("naive","score","fair","fair3"), + min.N = NULL, max.depth = NA, alpha = .05, alpha.invariance = NA, @@ -116,7 +119,7 @@ semtree.control <- # ordinal = 'maxLMo', # and maxLM are available # metric = 'maxLM'), linear = TRUE, - min.bucket = 10, + min.bucket = NULL, naive.bonferroni.type = 0, missing = 'ignore', use.maxlm = FALSE, @@ -143,7 +146,7 @@ semtree.control <- # minimum number of cases in leaf options$min.bucket <- min.bucket # method - options$method <- method + options$method <- match.arg(method) # maximal depth of the tree , set to NA for unrestricted trees options$max.depth <- max.depth # test invariance of strong restrictions @@ -196,3 +199,8 @@ semtree.control <- return(options) } + +#' @export +semtree_control <- function(...) { + semtree.control(...) +} \ No newline at end of file diff --git a/R/toLatex.R b/R/toLatex.R deleted file mode 100644 index f3b1240..0000000 --- a/R/toLatex.R +++ /dev/null @@ -1,3 +0,0 @@ -#toLatex <- function(object, ... ) UseMethod("toLatex") - -#toLatex.default <- utils::toLatex diff --git a/R/toLatex.semtree.R b/R/toLatex.semtree.R index 86cb856..f846209 100644 --- a/R/toLatex.semtree.R +++ b/R/toLatex.semtree.R @@ -1,3 +1,8 @@ +#toLatex <- function(object, ... ) UseMethod("toLatex") + +#toLatex.default <- utils::toLatex + +#' @exportS3Method toLatex semtree toLatex.semtree <- function(object, alternative.edge.labels=TRUE,root=NULL, prev_node=NULL, latex.mapping=NULL,parameter.names=NULL, stars=FALSE, linewidth=1, diff --git a/R/toTable.R b/R/toTable.R index 01b8e68..337c235 100644 --- a/R/toTable.R +++ b/R/toTable.R @@ -69,6 +69,11 @@ alls <- unique(alls) #covariate.names <-simplify2array(tree$result$btn.matrix[2,]) covariate.names <- getCovariatesFromTree(tree) +# default is to display all parameters +if (is.null(added.param.cols)) { + added.param.cols <- names(tree$params) +} + # all column names for the table to be generated (covariate names and parameter names) all.names <- c(covariate.names, added.param.cols) diff --git a/R/varimp.R b/R/varimp.R index 9e17be2..4d51ed0 100644 --- a/R/varimp.R +++ b/R/varimp.R @@ -115,11 +115,14 @@ varimp <- function(forest, colnames(result$importance.level1) <- var.names } + if (dim(result$importance)[1] == 1) { - #result$importance<-t(result$importance) + result$importance<-t(result$importance) + + # TODO: this is stupid, should be as.matrix?! or something else result$ll.baselines <- - t(t(result$ll.baselines)) # TODO: this is stupid, should be as.matrix?! - } + t(t(result$ll.baselines)) + } colnames(result$importance) <- var.names result$var.names <- var.names diff --git a/R/varimpFocus.R b/R/varimpFocus.R index 9486491..6d0f301 100644 --- a/R/varimpFocus.R +++ b/R/varimpFocus.R @@ -4,7 +4,7 @@ # # # -varimpFocus <- function(tree, data, cov.name, joint.model.list, constraints = NULL) +varimpFocus <- function(tree, data, cov.name, constraints = NULL) { has_constraints <- TRUE if (is.null(constraints)) { has_constraints <- FALSE } else { diff --git a/R/varimpTree.R b/R/varimpTree.R index 0a03d2a..932a20f 100644 --- a/R/varimpTree.R +++ b/R/varimpTree.R @@ -78,7 +78,6 @@ varimpTree <- function(tree, tree = tree, data = data, cov.name = cov.name, - joint.model.list, constraints = constraints ) } else { diff --git a/R/vcov_semtree.R b/R/vcov_semtree.R index f6d355e..ba86c44 100644 --- a/R/vcov_semtree.R +++ b/R/vcov_semtree.R @@ -10,7 +10,7 @@ vcov_semtree.lavaan <- function(x, ...) { if (x@Model@eq.constraints) { K <- eval(parse(text = "lavaan:::lav_constraints_R2K(x@Model)")) res <- solve(t(K) %*% lavaan::lavInspect(x, what = "information.expected") %*% K * - nobs(x)) + nobs(x)) } else { res <- x@vcov$vcov } @@ -18,32 +18,37 @@ vcov_semtree.lavaan <- function(x, ...) { } vcov_semtree.ctsemFit <- function(x, ...) { - ids <- which(colnames(x$mxobj$data$observed) %in% - grep(pattern = "^intervalID_T*", - x = colnames(x$mxobj$data$observed), - value = TRUE)) + grep( + pattern = "^intervalID_T*", + x = colnames(x$mxobj$data$observed), + value = TRUE + )) dat <- x$mxobj$data$observed[, -ids] - fit_untransformed <- ctsemOMX::ctFit(dat = dat, - ctmodelobj = x$ctmodelobj, - dataform = "wide", - stationary = x$ctfitargs$stationary, - fit = FALSE, - omxStartValues = coef.ctsemFit(x), - transformedParams = FALSE) + fit_untransformed <- ctsemOMX::ctFit( + dat = dat, + ctmodelobj = x$ctmodelobj, + dataform = "wide", + stationary = x$ctfitargs$stationary, + fit = FALSE, + omxStartValues = coef.ctsemFit(x), + transformedParams = FALSE + ) fit_untransformed <- OpenMx::mxModel( model = fit_untransformed$mxobj, OpenMx::mxComputeSequence(steps = list( - OpenMx::mxComputeNumericDeriv(checkGradient = FALSE, - hessian = TRUE, - analytic = FALSE) - ))) + OpenMx::mxComputeNumericDeriv( + checkGradient = FALSE, + hessian = TRUE, + analytic = FALSE + ) + )) + ) fit_untransformed <- OpenMx::mxRun(model = fit_untransformed, silent = TRUE) 2 * solve(fit_untransformed$output$calculatedHessian) - } diff --git a/README.Rmd b/README.Rmd index a2251d5..6896664 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,3 +1,8 @@ +--- +title: "Read Me" +output: md_document +--- + semtree ====== @@ -7,17 +12,21 @@ knitr::opts_chunk$set( collapse = TRUE ) ``` - + [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1116294.svg)](https://doi.org/10.5281/zenodo.1116294) [![cran version](http://www.r-pkg.org/badges/version/semtree)](https://cran.r-project.org/package=semtree) [![rstudio mirror downloads](http://cranlogs.r-pkg.org/badges/semtree)](https://github.com/r-hub/cranlogs.app) -[![Build Status](https://travis-ci.com/brandmaier/semtree.svg?branch=master)](https://travis-ci.com/brandmaier/semtree) +[![R-CMD-check](https://github.com/brandmaier/semtree/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/brandmaier/semtree/actions/workflows/R-CMD-check.yaml) +![Code size](https://img.shields.io/github/languages/code-size/brandmaier/semtree.svg) +![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/semtree) + +![contributions](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat) [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) ## What is this? -An R package for estimating Structural Equation Model Trees and Forests. +An R package for estimating Structural Equation Model (SEM) Trees and Forests. They are a fusion of SEM and decision trees, or SEM and random forests respectively. While SEM is a confirmatory modeling technique, SEM trees and forests allow to explore whether there are predictors that provide further information about an initial, theory-based model. Potential use cases are the search for potential predictors that explain individual differences, finding omitted variables in a model, or exploring measurement invariance over a large set of predictors. A recent overview is in our latest book chapter in the SEM handbook (Brandmaier & Jacobucci, 2023). ## Install @@ -29,7 +38,7 @@ install.packages("semtree") To install the latest semtree package directly from GitHub, copy the following line into R: ```{r, eval=FALSE} library(devtools) -devtools::install_github("semtree/brandmaier") +devtools::install_github("brandmaier/semtree") # even better: install with package vignette (extra documentation) devtools::install_github("brandmaier/semtree",force=TRUE, build_opts = c()) @@ -39,8 +48,6 @@ devtools::install_github("brandmaier/semtree",force=TRUE, build_opts = c()) Package documentation and use-cases with runnable R code can be found on our github pages: [https://brandmaier.github.io/semtree/](https://brandmaier.github.io/semtree/). -You may also want to visit the semtree website: [https://brandmaier.de/semtree](https://brandmaier.de/semtree) - Package vignettes (shipped with the package) contain documentation on how to use the package. Simply type this in R once you have loaded the package: ```{r eval=FALSE} @@ -53,15 +60,15 @@ Theory and method: - Brandmaier, A. M., & Jacobucci, R. C. (2023). Machine-learning approaches to structural equation modeling. In R. H. Hoyle (Ed.), Handbook of structural equation modeling (2nd rev. ed., pp. 722–739). Guilford Press. -- Arnold, M., Voelkle, M.C., and Brandmaier, A.M. (2021). Score-guided structural equation model trees. Frontiers in psychology 11, 564403. +- Arnold, M., Voelkle, M.C., and Brandmaier, A.M. (2021). Score-guided structural equation model trees. _Frontiers in psychology_, 11, 564403. - Brandmaier, A. M., Driver, C., & Voelkle, M. C. (2019). Recursive partitioning in continuous time analysis. In K. van Montfort, J. Oud, & M. C. Voelkle (Eds.), Continuous time modeling in the behavioral and related sciences. New York: Springer. -- Brandmaier, A. M., Prindle, J. J., McArdle, J. J., & Lindenberger, U. (2016). Theory-guided exploration with structural equation model forests. Psychological Methods, 21, 566-582. \doi{doi:10.1037/met0000090} +- Brandmaier, A. M., Prindle, J. J., McArdle, J. J., & Lindenberger, U. (2016). Theory-guided exploration with structural equation model forests. _Psychological Methods_, 21, 566-582. \doi{doi:10.1037/met0000090} - Brandmaier, A. M., von Oertzen, T., McArdle, J. J., & Lindenberger, U. (2014). Exploratory data mining with structural equation model trees. In J. J. McArdle & G. Ritschard (Eds.), Contemporary issues in exploratory data mining in the behavioral sciences (pp. 96-127). New York: Routledge. -- Brandmaier, A. M., von Oertzen, T., McArdle, J. J., & Lindenberger, U. (2013). Structural equation model trees. Psychological Methods, 18, 71-86. \doi{doi:10.1037/a0030001} +- Brandmaier, A. M., von Oertzen, T., McArdle, J. J., & Lindenberger, U. (2013). Structural equation model trees. _Psychological Methods_, 18, 71-86. \doi{doi:10.1037/a0030001} Applied examples (there are many more): diff --git a/README.md b/README.md index c4b198a..fb002cf 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,33 @@ # semtree + + [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1116294.svg)](https://doi.org/10.5281/zenodo.1116294) [![cran version](http://www.r-pkg.org/badges/version/semtree)](https://cran.r-project.org/package=semtree) [![rstudio mirror downloads](http://cranlogs.r-pkg.org/badges/semtree)](https://github.com/r-hub/cranlogs.app) -[![Build -Status](https://travis-ci.com/brandmaier/semtree.svg?branch=master)](https://travis-ci.com/brandmaier/semtree) +[![R-CMD-check](https://github.com/brandmaier/semtree/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/brandmaier/semtree/actions/workflows/R-CMD-check.yaml) +![Code +size](https://img.shields.io/github/languages/code-size/brandmaier/semtree.svg) +![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/semtree) + +![contributions](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat) [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) ## What is this? -An R package for estimating Structural Equation Model Trees and Forests. +An R package for estimating Structural Equation Model (SEM) Trees and +Forests. They are a fusion of SEM and decision trees, or SEM and random +forests respectively. While SEM is a confirmatory modeling technique, +SEM trees and forests allow to explore whether there are predictors that +provide further information about an initial, theory-based model. +Potential use cases are the search for potential predictors that explain +individual differences, finding omitted variables in a model, or +exploring measurement invariance over a large set of predictors. A +recent overview is in our latest book chapter in the SEM handbook +(Brandmaier & Jacobucci, 2023). ## Install @@ -24,7 +39,7 @@ To install the latest semtree package directly from GitHub, copy the following line into R: library(devtools) - devtools::install_github("semtree/brandmaier") + devtools::install_github("brandmaier/semtree") # even better: install with package vignette (extra documentation) devtools::install_github("brandmaier/semtree",force=TRUE, build_opts = c()) @@ -34,9 +49,6 @@ following line into R: Package documentation and use-cases with runnable R code can be found on our github pages: . -You may also want to visit the semtree website: - - Package vignettes (shipped with the package) contain documentation on how to use the package. Simply type this in R once you have loaded the package: @@ -53,7 +65,8 @@ Theory and method: pp. 722–739). Guilford Press. - Arnold, M., Voelkle, M.C., and Brandmaier, A.M. (2021). Score-guided - structural equation model trees. Frontiers in psychology 11, 564403. + structural equation model trees. *Frontiers in psychology*, 11, + 564403. - Brandmaier, A. M., Driver, C., & Voelkle, M. C. (2019). Recursive partitioning in continuous time analysis. In K. van Montfort, J. @@ -62,7 +75,7 @@ Theory and method: - Brandmaier, A. M., Prindle, J. J., McArdle, J. J., & Lindenberger, U. (2016). Theory-guided exploration with structural - equation model forests. Psychological Methods, 21, 566-582. + equation model forests. *Psychological Methods*, 21, 566-582. - Brandmaier, A. M., von Oertzen, T., McArdle, J. J., & Lindenberger, U. (2014). Exploratory data mining with structural @@ -72,7 +85,7 @@ Theory and method: - Brandmaier, A. M., von Oertzen, T., McArdle, J. J., & Lindenberger, U. (2013). Structural equation model trees. - Psychological Methods, 18, 71-86. + *Psychological Methods*, 18, 71-86. Applied examples (there are many more): diff --git a/docs/404.html b/docs/404.html index 7ccca1a..379ce99 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ semtree - 0.9.19 + 0.9.20 @@ -58,7 +58,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -112,7 +112,7 @@

    Page not found (404)

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/CONTRIBUTE.html b/docs/CONTRIBUTE.html index cb60979..bd16911 100644 --- a/docs/CONTRIBUTE.html +++ b/docs/CONTRIBUTE.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -76,11 +76,11 @@

    How to contribute?

    Some notes

    -
    • Please make sure that all tests pass before you create a pull request with a new feature or a bug fix, e.g., by running this in your local semtree directory:
    • -
    devtools::test(".")
    -
    • Please make sure that the package passes all checks that are required by CRAN before you create a pull request with a new feature or a bug fix, e.g., by running this in your local semtree directory:
    • -
    devtools::check(".")
    -
    • please use camel case for function names indicating the separation of words with a single capitalized letter, and the first word starting with lower case, e.g., checkBinSize() or getExpectedMean().
    • +
      • Please make sure that all tests pass before you create a pull request with a new feature or a bug fix, e.g., by running this in your local semtree directory: {r} devtools::test(".") +
      • +
      • Please make sure that the package passes all checks that are required by CRAN before you create a pull request with a new feature or a bug fix, e.g., by running this in your local semtree directory: {r} devtools::check(".") +
      • +
      • please use camel case for function names indicating the separation of words with a single capitalized letter, and the first word starting with lower case, e.g., checkBinSize() or getExpectedMean().
      @@ -100,7 +100,7 @@

      Some notes -

      Site built with pkgdown 2.0.7.

      +

      Site built with pkgdown 2.0.6.

      diff --git a/docs/CONTRIBUTORS.html b/docs/CONTRIBUTORS.html index 438712a..4fdcdb5 100644 --- a/docs/CONTRIBUTORS.html +++ b/docs/CONTRIBUTORS.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
    • - Score-based Tests + SEM Trees with score-based tests
    • Focus parameters in SEM forests @@ -88,7 +88,7 @@

      NA

      -

      Site built with pkgdown 2.0.7.

      +

      Site built with pkgdown 2.0.6.

      diff --git a/docs/LICENSE.html b/docs/LICENSE.html index b6fc3fe..8de4495 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
    • - Score-based Tests + SEM Trees with score-based tests
    • Focus parameters in SEM forests @@ -238,27 +238,27 @@

      17. Interpretation of Sectio

      How to Apply These Terms to Your New Programs

      If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.

      To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the “copyright” line and a pointer to where the full notice is found.

      -
      <one line to give the program's name and a brief idea of what it does.>
      -Copyright (C) 2020 pdc
      -
      -This program is free software: you can redistribute it and/or modify
      -it under the terms of the GNU General Public License as published by
      -the Free Software Foundation, either version 3 of the License, or
      -(at your option) any later version.
      -
      -This program is distributed in the hope that it will be useful,
      -but WITHOUT ANY WARRANTY; without even the implied warranty of
      -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      -GNU General Public License for more details.
      -
      -You should have received a copy of the GNU General Public License
      -along with this program.  If not, see <http://www.gnu.org/licenses/>.
      +
      <one line to give the program's name and a brief idea of what it does.>
      +Copyright (C) 2020 pdc
      +
      +This program is free software: you can redistribute it and/or modify
      +it under the terms of the GNU General Public License as published by
      +the Free Software Foundation, either version 3 of the License, or
      +(at your option) any later version.
      +
      +This program is distributed in the hope that it will be useful,
      +but WITHOUT ANY WARRANTY; without even the implied warranty of
      +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      +GNU General Public License for more details.
      +
      +You should have received a copy of the GNU General Public License
      +along with this program.  If not, see <http://www.gnu.org/licenses/>.

      Also add information on how to contact you by electronic and paper mail.

      If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode:

      -
      semtree Copyright (C) 2020 pdc
      -This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
      -This is free software, and you are welcome to redistribute it
      -under certain conditions; type 'show c' for details.
      +
      semtree Copyright (C) 2020 pdc
      +This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
      +This is free software, and you are welcome to redistribute it
      +under certain conditions; type 'show c' for details.

      The hypothetical commands show w and show c should show the appropriate parts of the General Public License. Of course, your program’s commands might be different; for a GUI interface, you would use an “about box”.

      You should also get your employer (if you work as a programmer) or school, if any, to sign a “copyright disclaimer” for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see <http://www.gnu.org/licenses/>.

      The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read <http://www.gnu.org/philosophy/why-not-lgpl.html>.

      @@ -280,7 +280,7 @@

      How to Apply These Terms
      -

      Site built with pkgdown 2.0.7.

      +

      Site built with pkgdown 2.0.6.

      diff --git a/docs/articles/constraints.html b/docs/articles/constraints.html index e6fd4c4..7fa85f9 100644 --- a/docs/articles/constraints.html +++ b/docs/articles/constraints.html @@ -33,7 +33,7 @@ semtree - 0.9.19 + 0.9.20 @@ -59,7 +59,7 @@ Getting Started with the semtree package

    • - Score-based Tests + SEM Trees with score-based tests
    • Focus parameters in SEM forests @@ -94,7 +94,7 @@

      Constraints in semtree

      Andreas M. Brandmaier

      -

      2023-04-06

      +

      2024-04-15

      Source: vignettes/constraints.Rmd @@ -197,52 +197,64 @@

      Global Invariance
      tree.gc <- semtree(model.cfa, data=cfa.sim, constraints=
      -                   semtree.constraints(global.invariance = 
      -                                         c("F__x1","F__x2","F__x3","F__x4")))
      -#> > Model was not run. Estimating parameters now.
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1296.86314066279, new current best! (was 23607.7613598408)
      -                                                                             
      -> Global Constraints:
      -#> F__x1 F__x2 F__x3 F__x4
      -#> > Freely Estimated Parameters:
      -#> VAR_x1 VAR_x2 VAR_x3 VAR_x4 const__x2 const__x3 const__x4 const__F
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1296.8631406626, new current best! (was 1296.86314066277)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=811.249045946747, new current best! (was 1071.01945896895)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=125.670448647526, new current best! (was 411.308446842878)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=97.7311535251429, new current best! (was 399.94059910378)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=-67.3504247916599, new current best! (was 225.843681693765)
      -Beginning fit attempt 1 of at maximum 10 extra tries                          
      -Beginning fit attempt 2 of at maximum 10 extra tries
      -Beginning fit attempt 3 of at maximum 10 extra tries
      -Beginning fit attempt 4 of at maximum 10 extra tries
      -Fit attempt 4, fit=-67.350424791684, new current best! (was -67.3504247916599)
      -Beginning fit attempt 5 of at maximum 10 extra tries                          
      -Fit attempt 5, fit=-67.3504247916867, new current best! (was -67.350424791684)
      -Beginning fit attempt 6 of at maximum 10 extra tries                          
      -Fit attempt 6, fit=-67.3504247917372, new current best! (was -67.3504247916867)
      -Beginning fit attempt 7 of at maximum 10 extra tries                           
      -Beginning fit attempt 8 of at maximum 10 extra tries
      -Beginning fit attempt 9 of at maximum 10 extra tries
      -Beginning fit attempt 10 of at maximum 10 extra tries
      -                                                     
      -
[32m✔
[39m Tree construction finished [took 2s].
      +
      tree.gc <- semtree(model.cfa, data=cfa.sim, constraints=
      +                   semtree.constraints(global.invariance = 
      +                                         c("F__x1","F__x2","F__x3","F__x4")))
      +#> ❯ Model was not run. Estimating parameters now.
      +#> 
      +Beginning initial fit attempt
      +Fit attempt 0, fit=1245.04605304561, new current best! (was 23512.9380282892)
      +                                                                             
      +❯ Global Constraints:
      +#> F__x1 F__x2 F__x3 F__x4
      +#> ❯ Freely Estimated Parameters:
      +#> VAR_x1 VAR_x2 VAR_x3 VAR_x4 const__x2 const__x3 const__x4 const__F
      +#> 
      +Beginning initial fit attempt
      +Fit attempt 0, fit=1245.0460530455, new current best! (was 1245.04605304558)
      +                                                                            
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=700.981679486895, new current best! (was 935.197549062569)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=82.3725219274684, new current best! (was 404.027616923408)
      +Beginning fit attempt 1 of at maximum 10 extra tries                         
      +Fit attempt 1, fit=82.3725219274052, new current best! (was 82.3725219274684)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=-12.8178155443534, new current best! (was 296.954062563265)
      +Beginning fit attempt 1 of at maximum 10 extra tries                          
      +Fit attempt 1, fit=-12.8178155443725, new current best! (was -12.8178155443534)
      +Beginning fit attempt 2 of at maximum 10 extra tries                           
      +Beginning fit attempt 3 of at maximum 10 extra tries
      +Beginning fit attempt 4 of at maximum 10 extra tries
      +Beginning fit attempt 5 of at maximum 10 extra tries
      +Beginning fit attempt 6 of at maximum 10 extra tries
      +Beginning fit attempt 7 of at maximum 10 extra tries
      +Beginning fit attempt 8 of at maximum 10 extra tries
      +Beginning fit attempt 9 of at maximum 10 extra tries
      +Beginning fit attempt 10 of at maximum 10 extra tries
      +                                                     
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=41.8996556241436, new current best! (was 309.848503983403)
      +Beginning fit attempt 1 of at maximum 10 extra tries                         
      +Fit attempt 1, fit=41.8996556240704, new current best! (was 41.8996556241436)
      +Beginning fit attempt 2 of at maximum 10 extra tries                         
      +Fit attempt 2, fit=41.8996556240695, new current best! (was 41.8996556240704)
      +Beginning fit attempt 3 of at maximum 10 extra tries                         
      +Beginning fit attempt 4 of at maximum 10 extra tries
      +Beginning fit attempt 5 of at maximum 10 extra tries
      +Beginning fit attempt 6 of at maximum 10 extra tries
      +Beginning fit attempt 7 of at maximum 10 extra tries
      +Beginning fit attempt 8 of at maximum 10 extra tries
      +Beginning fit attempt 9 of at maximum 10 extra tries
      +Beginning fit attempt 10 of at maximum 10 extra tries
      +                                                     
      +
[32m✔
[39m Tree construction finished [took less than a second].
       plot(tree.gc)

      @@ -262,35 +274,44 @@

      Local Invariancelocal.invariance to allow a tree with weakly measurement-invariant leafs.

      -
      tree.lc <- semtree(model.cfa, data=cfa.sim, constraints=
      -                   semtree.constraints(
      -                     local.invariance= c("F__x1","F__x2","F__x3","F__x4")))
      -#> > Model was not run. Estimating parameters now.
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1296.86314066279, new current best! (was 23607.7613598408)
      -                                                                             
      -> No Invariance alpha selected. alpha.invariance set to:0.05
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1296.86314066257, new current best! (was 1296.86314066279)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=796.298827273366, new current best! (was 1071.01945865405)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=-115.56317117639, new current best! (was 225.84368200868)
      -Beginning fit attempt 1 of at maximum 10 extra tries                        
      -Fit attempt 1, fit=-115.56317117659, new current best! (was -115.56317117639)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took 2s].
      +
      tree.lc <- semtree(model.cfa, data=cfa.sim, constraints=
      +                   semtree.constraints(
      +                     local.invariance= c("F__x1","F__x2","F__x3","F__x4")))
      +#> ❯ Model was not run. Estimating parameters now.
      +#> 
      +Beginning initial fit attempt
      +Fit attempt 0, fit=1245.04605304561, new current best! (was 23512.9380282892)
      +                                                                             
      +❯ No Invariance alpha selected. alpha.invariance set to:0.05
      +#> 
      +Beginning initial fit attempt
      +Fit attempt 0, fit=1245.04605304549, new current best! (was 1245.04605304561)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=674.376601044165, new current best! (was 935.197546229974)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=14.8502289550343, new current best! (was 309.84850681597)
      +Beginning fit attempt 1 of at maximum 10 extra tries                        
      +Fit attempt 1, fit=14.8502289550283, new current best! (was 14.8502289550343)
      +Beginning fit attempt 2 of at maximum 10 extra tries                         
      +Beginning fit attempt 3 of at maximum 10 extra tries
      +Beginning fit attempt 4 of at maximum 10 extra tries
      +Beginning fit attempt 5 of at maximum 10 extra tries
      +Beginning fit attempt 6 of at maximum 10 extra tries
      +Beginning fit attempt 7 of at maximum 10 extra tries
      +Beginning fit attempt 8 of at maximum 10 extra tries
      +Beginning fit attempt 9 of at maximum 10 extra tries
      +Beginning fit attempt 10 of at maximum 10 extra tries
      +                                                     
      +
[32m✔
[39m Tree construction finished [took 1s].

      Now we find p1 as the only predictor that yields subgroups that pass the measurement invariance test. Even though we have chosen the four factor loadings as local.invariance @@ -352,11 +373,11 @@

      Focus Parameters#> #> free parameters: #> name matrix row col Estimate Std.Error A -#> 1 VAR_x1 S x1 x1 4.0283800 0.18015521 -#> 2 COV_x1_x2 S x1 x2 0.3039196 0.11444978 -#> 3 VAR_x2 S x2 x2 3.2282345 0.14437113 -#> 4 mu1 M 1 x1 1.4187341 0.06346967 -#> 5 mu2 M 1 x2 1.4628999 0.05681795 +#> 1 VAR_x1 S x1 x1 4.0283800 0.18015309 +#> 2 COV_x1_x2 S x1 x2 0.3039196 0.11443882 +#> 3 VAR_x2 S x2 x2 3.2282345 0.14437055 +#> 4 mu1 M 1 x1 1.4187341 0.06346921 +#> 5 mu2 M 1 x2 1.4628999 0.05681732 #> #> Model Statistics: #> | Parameters | Degrees of Freedom | Fit (-2lnL units) @@ -374,48 +395,48 @@

      Focus Parameters#> RMSEA: 0 [95% CI (NA, NA)] #> Prob(RMSEA <= 0.05): NA #> To get additional fit indices, see help(mxRefModels) -#> timestamp: 2023-04-06 12:46:50 -#> Wall clock time: 0.01908207 secs +#> timestamp: 2024-04-15 22:32:57 +#> Wall clock time: 0.009307146 secs #> optimizer: SLSQP #> OpenMx version number: 2.21.1 #> Need help? See help(mxSummary)

      Now, we grow a tree without constraints:

      -
      
      -tree.biv <- semtree(model.biv, data=df.biv)
      -#> > Model was not run. Estimating parameters now.
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=3454.12434636158, new current best! (was 4066.88531949563)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1555.54412300078, new current best! (was 1720.05414322814)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1569.26472590267, new current best! (was 1734.07020313343)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=3566.5692080098, new current best! (was 4167.0405063558)
      -                                                                           
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1593.91684303245, new current best! (was 1780.60715330577)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1576.27862642528, new current best! (was 1785.96205470403)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took 2s].
      +
      
      +tree.biv <- semtree(model.biv, data=df.biv)
      +#> ❯ Model was not run. Estimating parameters now.
      +#> 
      +Beginning initial fit attempt
      +Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=3454.12434636158, new current best! (was 4066.88531930229)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=1555.54412300078, new current best! (was 1720.05414323192)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=1569.26472590267, new current best! (was 1734.07020312965)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=3566.5692080098, new current best! (was 4167.04050654914)
      +                                                                            
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=1593.91684303245, new current best! (was 1780.60715331027)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=1576.27862642528, new current best! (was 1785.96205469953)
      +                                                                             
      +
[32m✔
[39m Tree construction finished [took less than a second].

      As expected, we obtain a tree structure that has both p1 and p2 (here we use the viridis colors to give each leaf node a different frame color, which we’ll use later again):

      @@ -452,27 +473,27 @@

      Focus Parametersfocus.parameter.

      Let us first set mu1 as focus parameter:

      -
      
      -tree.biv2 <- semtree(model.biv, df.biv, constraints=
      -                      semtree.constraints(focus.parameters = "mu1"))
      -#> > Model was not run. Estimating parameters now.
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=3740.92185296731, new current best! (was 4086.36288876948)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=3795.16307144921, new current best! (was 4147.56293708195)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took 1s].
      +
      
      +tree.biv2 <- semtree(model.biv, df.biv, constraints=
      +                      semtree.constraints(focus.parameters = "mu1"))
      +#> ❯ Model was not run. Estimating parameters now.
      +#> 
      +Beginning initial fit attempt
      +Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=3740.92185296731, new current best! (was 4086.36288893237)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=3795.16307144921, new current best! (was 4147.56293691906)
      +                                                                             
      +
[32m✔
[39m Tree construction finished [took less than a second].
       plot(tree.biv2)

      @@ -481,27 +502,27 @@

      Focus Parametersmu1. Predictor grp2 did not come up anymore. Now, if we set mu2, we should see the exact opposite picture:

      -
      
      -tree.biv3 <- semtree(model.biv, df.biv, constraints=
      -                      semtree.constraints(focus.parameters = "mu2"))
      -#> > Model was not run. Estimating parameters now.
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=3454.12434636158, new current best! (was 4066.88531949563)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=3566.5692080098, new current best! (was 4167.0405063558)
      -                                                                           
      -
[32m✔
[39m Tree construction finished [took 1s].
      +
      
      +tree.biv3 <- semtree(model.biv, df.biv, constraints=
      +                      semtree.constraints(focus.parameters = "mu2"))
      +#> ❯ Model was not run. Estimating parameters now.
      +#> 
      +Beginning initial fit attempt
      +Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=3454.12434636158, new current best! (was 4066.88531930229)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=3566.5692080098, new current best! (was 4167.04050654914)
      +                                                                            
      +
[32m✔
[39m Tree construction finished [took less than a second].

      And, indeed, we see only grp2 as predictor whereas grp1 was not selected this time.

      @@ -511,21 +532,21 @@ 

      Focus Parameters -
      
      -tree.biv4 <- semtree(model.biv, df.biv, constraints=
      -                      semtree.constraints(focus.parameters = "VAR_x2"))
      -#> > Model was not run. Estimating parameters now.
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took less than a second].
      -
      -plot(tree.biv4)
      +
      
      +tree.biv4 <- semtree(model.biv, df.biv, constraints=
      +                      semtree.constraints(focus.parameters = "VAR_x2"))
      +#> ❯ Model was not run. Estimating parameters now.
      +#> 
      +Beginning initial fit attempt
      +Fit attempt 0, fit=8233.92582585158, new current best! (was 14528.4141425595)
      +                                                                             
      +
      +Beginning initial fit attempt
      +Fit attempt 0, fit=8233.92582585143, new current best! (was 8233.92582585158)
      +                                                                             
      +
[32m✔
[39m Tree construction finished [took less than a second].
      +
      +plot(tree.biv4)

      @@ -548,7 +569,7 @@

      Focus Parameters

      -

      Site built with pkgdown 2.0.7.

      +

      Site built with pkgdown 2.0.6.

      diff --git a/docs/articles/constraints_files/figure-html/plbv2-1.png b/docs/articles/constraints_files/figure-html/plbv2-1.png index 9f97b31..2441608 100644 Binary files a/docs/articles/constraints_files/figure-html/plbv2-1.png and b/docs/articles/constraints_files/figure-html/plbv2-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-10-1.png index aaf7f14..ad1756f 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-10-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-11-1.png index 9b4d438..e3d44d2 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-11-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-12-1.png index 10cdbd1..7585b2c 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-12-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-14-1.png index 26296be..88d33d5 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-14-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-15-1.png index fea4170..41ba7dc 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-15-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-15-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-4-1.png index 0b2fcc1..66505ec 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/constraints_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/constraints_files/figure-html/unnamed-chunk-6-1.png index 4a33c18..2a78766 100644 Binary files a/docs/articles/constraints_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/constraints_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/forests.html b/docs/articles/forests.html index a6cd0ae..a3063da 100644 --- a/docs/articles/forests.html +++ b/docs/articles/forests.html @@ -33,7 +33,7 @@ semtree - 0.9.19 + 0.9.20 @@ -59,7 +59,7 @@ Getting Started with the semtree package

    • - Score-based Tests + SEM Trees with score-based tests
    • Focus parameters in SEM forests @@ -94,7 +94,7 @@

      SEM Forests

      Andreas Brandmaier

      -

      2023-04-06

      +

      2024-04-15

      Source: vignettes/forests.Rmd @@ -103,6 +103,12 @@

      2023-04-06

      +

      This example demonstrates how SEM forests can be grown. SEM forests +are ensembles of typically hundreds to thousands of SEM trees. Using +permutation-based variable importance estimates, we can aggregate the +importance of each predictor for improving model fit.

      +

      Here, we use the affect dataset and a simple SEM with +only a single observed variable and no latent variables.

      Load data

      @@ -353,8 +359,8 @@

      Create simple model of state anxie #> RMSEA: 0 [95% CI (NA, NA)] #> Prob(RMSEA <= 0.05): NA #> To get additional fit indices, see help(mxRefModels) -#> timestamp: 2023-04-06 12:47:04 -#> Wall clock time: 0.064399 secs +#> timestamp: 2024-04-15 22:33:05 +#> Wall clock time: 0.02428603 secs #> optimizer: SLSQP #> OpenMx version number: 2.21.1 #> Need help? See help(mxSummary)

      @@ -363,10 +369,12 @@

      Create simple model of state anxie

      Forest

      Create a forest control object that stores all tuning parameters of -the forest. Note that we use only 5 trees for demo purposes. Please -increase the number in real applications.

      +the forest. Note that we use only 5 trees for illustration. Please +increase the number in real applications to several hundreds. To speed +up computation time, consider score-based test for variable selection in +the trees.

      -control <- semforest.control(num.trees = 5)
      +control <- semforest_control(num.trees = 5)
       print(control)
       #> SEM-Forest control:
       #> -----------------
      @@ -389,243 +397,26 @@ 

      Forest #> ● Progress Bar: TRUE #> ● Seed: NA

      Now, run the forest using the control object:

      -
      forest <- semforest( model=model,
      -                     data = affect, 
      -                     control = control,
      -                     covariates = c("Study","Film", "state1",
      -                                    "PA2","NA2","TA2"))
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=1289.15758570645, new current best! (was 1387.78413290756)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=818.25448472129, new current best! (was 819.184832331613)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=540.657188936563, new current best! (was 554.537456022498)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=540.657188936539, new current best! (was 540.657188936563)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=253.262932464789, new current best! (was 265.845563494021)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=267.917686855653, new current best! (was 274.811625442518)
      -                                                                             
      -
      -Beginning initial fit attempt
      -                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=230.436210492665, new current best! (was 263.717028698793)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=91.8058189525733, new current best! (was 95.4201985173459)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=130.239485916693, new current best! (was 135.016011975319)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took 11s].
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=788.643601362753, new current best! (was 789.364982717159)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=356.921622008996, new current best! (was 375.924543648624)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=274.070041829913, new current best! (was 275.659388744346)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=76.224093068116, new current best! (was 81.2622332646489)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=399.837330598193, new current best! (was 412.719057714128)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=97.1496598369175, new current best! (was 116.06664283684)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=272.800917814333, new current best! (was 283.770687761354)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=123.620225509635, new current best! (was 126.437967074978)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=144.858996740466, new current best! (was 146.362950739354)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=54.3867250107566, new current best! (was 63.3116865213297)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=77.7820652871679, new current best! (was 81.5473102191363)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took 10s].
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=834.887957588376, new current best! (was 835.097735354963)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=484.442995019425, new current best! (was 504.824969995514)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=327.747061041181, new current best! (was 335.605482887242)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=226.487492378292, new current best! (was 232.62717028389)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=122.51145092926, new current best! (was 124.519247808258)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=99.0044984589434, new current best! (was 101.968244570034)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=82.7340026651454, new current best! (was 95.1198907572906)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=132.40038301966, new current best! (was 148.837512132183)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=305.43653582216, new current best! (was 330.062987592862)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=167.668643402405, new current best! (was 170.078100401807)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=131.357409966922, new current best! (was 135.358435420353)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took 9s].
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=848.46463702424, new current best! (was 848.840697274375)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=328.187465026306, new current best! (was 366.760803980649)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=150.789034618148, new current best! (was 163.442241865474)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=152.956425048368, new current best! (was 164.745223160832)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=457.506356717906, new current best! (was 481.703833043592)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=321.91996897439, new current best! (was 326.091979281769)
      -                                                                            
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=158.055244073882, new current best! (was 164.875520523358)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=149.539343979152, new current best! (was 157.044448451032)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=73.5643765034496, new current best! (was 76.0629347964262)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=72.0937976825281, new current best! (was 73.4764091827261)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=118.046439931572, new current best! (was 131.414377436137)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took 9s].
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=816.768764245928, new current best! (was 818.827214348166)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=383.803628624992, new current best! (was 395.149459331774)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=185.014060679185, new current best! (was 198.367577946491)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=171.297462750686, new current best! (was 185.436050678501)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=415.322910009232, new current best! (was 421.619304914154)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=268.217878591544, new current best! (was 284.494189283796)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=80.8254886521707, new current best! (was 86.5883323542989)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=178.831836528103, new current best! (was 181.629546237244)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=88.0410808835288, new current best! (was 97.6984858211589)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=75.6021407498517, new current best! (was 81.1333507069438)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=111.301359647879, new current best! (was 130.828720725437)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took 12s].
      -#> 
[32m✔
[39m Forest completed [took 50s]
      +
      +forest <- semforest( model=model,
      +                     data = affect, 
      +                     control = control,
      +                     covariates = c("Study","Film", "state1",
      +                                    "PA2","NA2","TA2"))

      Variable importance

      -

      Next, we compute permutation-based variable importance.

      +

      Next, we compute permutation-based variable importance. This may take +some time.

       vim <- varimp(forest)
       print(vim, sort.values=TRUE)
       #> Variable Importance
      -#>       Film      Study     state1        PA2        TA2        NA2 
      -#> -0.3848008  2.0517864  9.8412123 25.5626097 31.7029700 84.7036831
      +#>         Study           PA2        state1           TA2          Film 
      +#> -9.659311e-08  9.418006e+00  1.218599e+01  2.066494e+01  2.537268e+01 
      +#>           NA2 
      +#>  3.658740e+01
       plot(vim)

      From this, we can learn that variables such as NA2 @@ -654,7 +445,7 @@

      Variable importance

      -

      Site built with pkgdown 2.0.7.

      +

      Site built with pkgdown 2.0.6.

      diff --git a/docs/articles/forests_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/forests_files/figure-html/unnamed-chunk-6-1.png index 64dfca4..8d75b91 100644 Binary files a/docs/articles/forests_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/forests_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/getting-started.html b/docs/articles/getting-started.html index 89300f6..b2b6bf1 100644 --- a/docs/articles/getting-started.html +++ b/docs/articles/getting-started.html @@ -33,7 +33,7 @@ semtree - 0.9.19 + 0.9.20 @@ -59,7 +59,7 @@ Getting Started with the semtree package
    • - Score-based Tests + SEM Trees with score-based tests
    • Focus parameters in SEM forests @@ -103,14 +103,23 @@

      Getting Started with the semtree package

      Load the Package

      +

      We first load the semtree package and the +OpenMx package for specifying our SEM.

       library(semtree)
       #> Loading required package: OpenMx
      +#> OpenMx may run faster if it is compiled to take advantage of multiple cores.
       library(OpenMx)

      Simulate data

      +

      Now, we simulate some data from a linear latent growth curve model +(that is, a random intercept and random slope over time). The dataset +will be called growth.data. The dataset contains five +observations for each individual (X1 to X5) +and one predictor P1. The predictor is dichotomous and +predicts a (quite large) difference in mean slope.

       set.seed(23)
       N <- 1000
      @@ -123,12 +132,15 @@ 

      Simulate data(slope + p1 * 5) %*% t(loadings) + matrix(rep(icept, each = M), byrow = TRUE, ncol = M) + rnorm(N * M, sd = .08) -growth.data <- data.frame(x, p1) +growth.data <- data.frame(x, factor(p1)) names(growth.data) <- c(paste0("X", 1:M), "P1")

      Specify an OpenMx model

      +

      Now, we specify a linear latent growth curve model using OpenMx’s +path specification. The model has five observed variables. Residual +variances are assumed to be identical over time.

       manifests <- names(growth.data)[1:5]
       growthCurveModel <- mxModel("Linear Growth Curve Model Path Specification",
      @@ -186,35 +198,30 @@ 

      Specify an OpenMx model values=c(1, 1), labels=c("meani", "means") ) -) # close model

      +) # close model + +# fit the model to the entire dataset +growthCurveModel <- mxRun(growthCurveModel) +#> Running Linear Growth Curve Model Path Specification with 6 parameters +#> Warning: In model 'Linear Growth Curve Model Path Specification' Optimizer +#> returned a non-zero status code 5. The Hessian at the solution does not appear +#> to be convex. See ?mxCheckIdentification for possible diagnosis (Mx status RED).

      Run a tree

      -
      tree <- semtree(model = growthCurveModel, data = growth.data)
      -#> > Model was not run. Estimating parameters now.
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=7638.98979615046, new current best! (was 84974.7019994723)
      -                                                                             
      -
[33m✖
[39m Variable P1 is numeric but has only few unique values. Consider recoding as ordered factor.
      -#> 
      -Beginning initial fit attempt
      -Fit attempt 0, fit=7638.98979614189, new current best! (was 7638.98979615046)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=2780.73090202519, new current best! (was 3625.89178697863)
      -                                                                             
      -
      -Beginning initial fit attempt
      -Fit attempt 0, fit=3175.11831426993, new current best! (was 4013.09800926999)
      -                                                                             
      -
[32m✔
[39m Tree construction finished [took less than a second].
      +

      Now, we grow a SEM tree using the semtree function, +which takes the model and the dataset as input. If not specified +otherwise, SEM tree will assume that all variables in the dataset, which +are not observed variables in the dataset are potential predictors.

      +
      +tree <- semtree(model = growthCurveModel, 
      +                data = growth.data)

      Plotting

      +

      Once the tree is grown, we can plot it:

       plot(tree)

      @@ -238,7 +245,7 @@

      Plotting

      -

      Site built with pkgdown 2.0.7.

      +

      Site built with pkgdown 2.0.6.

      diff --git a/docs/articles/getting-started_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/getting-started_files/figure-html/unnamed-chunk-4-1.png index 7dfbbe4..f3cdf87 100644 Binary files a/docs/articles/getting-started_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/getting-started_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 72b385c..3d99133 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
    • - Score-based Tests + SEM Trees with score-based tests
    • Focus parameters in SEM forests @@ -78,7 +78,7 @@

      All vignettes

      Getting Started with the semtree package
      -
      Score-based Tests
      +
      SEM Trees with score-based tests
      Focus parameters in SEM forests
      @@ -92,7 +92,7 @@

      All vignettes

      -

      Site built with pkgdown 2.0.7.

      +

      Site built with pkgdown 2.0.6.

      diff --git a/docs/articles/score-based-tests.html b/docs/articles/score-based-tests.html index b7923c0..cc5b0ef 100644 --- a/docs/articles/score-based-tests.html +++ b/docs/articles/score-based-tests.html @@ -5,13 +5,13 @@ -Score-based Tests • semtree +SEM Trees with score-based tests • semtree - + NA • semtree + + +
      +
      + + + +
      +
      + + + +

      Dear CRAN team,

      +

      this is a maintenance update with a couple of bug fixes users reported over the last year.

      +

      Thanks & best wishes, Andreas

      + + +
      + + + +
      + + + +
      + +
      +

      Site built with pkgdown 2.0.7.

      +
      + +
      + + + + + + + + diff --git a/docs/index.html b/docs/index.html index 7eca384..ba3d0a9 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ semtree - 0.9.19 + 0.9.20 @@ -68,7 +68,7 @@ Getting Started with the semtree package
    • - Score-based Tests + SEM Trees with score-based tests
    • Focus parameters in SEM forests @@ -101,31 +101,31 @@
      -

      DOI cran version rstudio mirror downloads Build Status License: GPL v3

      + +

      What is this?

      -

      An R package for estimating Structural Equation Model Trees and Forests.

      +

      An R package for estimating Structural Equation Model (SEM) Trees and Forests. They are a fusion of SEM and decision trees, or SEM and random forests respectively. While SEM is a confirmatory modeling technique, SEM trees and forests allow to explore whether there are predictors that provide further information about an initial, theory-based model. Potential use cases are the search for potential predictors that explain individual differences, finding omitted variables in a model, or exploring measurement invariance over a large set of predictors. A recent overview is in our latest book chapter in the SEM handbook (Brandmaier & Jacobucci, 2023).

      Install

      Install the latest stable version from CRAN:

      -
      install.packages("semtree")
      +
      install.packages("semtree")

      To install the latest semtree package directly from GitHub, copy the following line into R:

      -
      library(devtools)
      -devtools::install_github("semtree/brandmaier")
      -
      -# even better: install with package vignette (extra documentation)
      -devtools::install_github("brandmaier/semtree",force=TRUE, build_opts = c())
      +
      library(devtools)
      +devtools::install_github("brandmaier/semtree")
      +
      +# even better: install with package vignette (extra documentation)
      +devtools::install_github("brandmaier/semtree",force=TRUE, build_opts = c())

      Usage

      Package documentation and use-cases with runnable R code can be found on our github pages: https://brandmaier.github.io/semtree/.

      -

      You may also want to visit the semtree website: https://brandmaier.de/semtree

      Package vignettes (shipped with the package) contain documentation on how to use the package. Simply type this in R once you have loaded the package:

      -
      browseVignettes("semtree")
      +
      browseVignettes("semtree")

      References @@ -133,11 +133,15 @@

      References
    • +
    • +

    • Brandmaier, A. M., Driver, C., & Voelkle, M. C. (2019). Recursive partitioning in continuous time analysis. In K. van Montfort, J. Oud, & M. C. Voelkle (Eds.), Continuous time modeling in the behavioral and related sciences. New York: Springer.

    • -
    • Brandmaier, A. M., Prindle, J. J., McArdle, J. J., & Lindenberger, U. (2016). Theory-guided exploration with structural equation model forests. Psychological Methods, 21, 566-582.

    • +
    • Brandmaier, A. M., Prindle, J. J., McArdle, J. J., & Lindenberger, U. (2016). Theory-guided exploration with structural equation model forests. Psychological Methods, 21, 566-582.

    • Brandmaier, A. M., von Oertzen, T., McArdle, J. J., & Lindenberger, U. (2014). Exploratory data mining with structural equation model trees. In J. J. McArdle & G. Ritschard (Eds.), Contemporary issues in exploratory data mining in the behavioral sciences (pp. 96-127). New York: Routledge.

    • -
    • Brandmaier, A. M., von Oertzen, T., McArdle, J. J., & Lindenberger, U. (2013). Structural equation model trees. Psychological Methods, 18, 71-86.

    • +
    • Brandmaier, A. M., von Oertzen, T., McArdle, J. J., & Lindenberger, U. (2013). Structural equation model trees. Psychological Methods, 18, 71-86.

    Applied examples (there are many more):

    Brandmaier, A. M., Ram, N., Wagner, G. G., & Gerstorf, D. (2017). Terminal decline in well-being: The role of multi-indicator constellations of physical health and psychosocial correlates. Developmental Psychology.

    @@ -181,7 +185,16 @@

    Developers

    - +
    +

    Dev status

    +
      +
    • DOI
    • +
    • cran version
    • +
    • rstudio mirror downloads
    • +
    • R-CMD-check
    • +
    • License: GPL v3
    • +
    +
    @@ -194,7 +207,7 @@

    Developers

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/news/index.html b/docs/news/index.html index 41ac0ef..602ef20 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -70,13 +70,23 @@

    Changelog

    - -
    • changed default behavior of print function of varimp, such that na.omit=TRUE, which is consistent with other packages like party or partykit
    • + +
      • added an error handler for score-based tests when the vcov matrix cannot be computed (e.g., models with Heywood cases)
      • +
      • leaner package imports: removed dependency on bitops and stringr package
      • +
      • prefer semforest_control() over semforest.control() and semtree_control() over semtree.control() +
      • +
      • added heuristics for choosing mtry in forests (if NULL) and for choosing min.N and min.bucket (if NULL)
      • +
      • moved dependency on ctsemOMX to suggested package
    - -
    • bugfix in score-based tests that sometimes did not respect min.N constraints
    • + +
      • changed default behavior of print function of varimp, such that na.omit=TRUE, which is consistent with other packages like party or partykit
      • +
      • fixed issues with toTable()-command, by default, all parameters are shown now, also fixed a bug with score-based tests and toTable()
      • +
      • fixed problem with focus-parameters and variable importance
      • +
      • bugfix in score-based tests that sometimes did not respect min.N constraints
      • new functionality for parameter contribution evaluation
      • +
      • more verbose vignettes
      • +
      • removed dependency on set, plotrix and digest package to make package imports leaner
    @@ -135,7 +145,7 @@
    @@ -153,7 +163,7 @@
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -106,7 +106,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/boruta.html b/docs/reference/boruta.html new file mode 100644 index 0000000..49aaa97 --- /dev/null +++ b/docs/reference/boruta.html @@ -0,0 +1,144 @@ + +BORUTA algorithm for SEM trees — boruta • semtree + + +
    +
    + + + +
    +
    + + +
    +

    BORUTA algorithm for SEM trees

    +
    + +
    +
    boruta(
    +  model,
    +  data,
    +  control = NULL,
    +  predictors = NULL,
    +  percentile_threshold = 100,
    +  rounds = 1,
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    model
    +

    A template model specification from OpenMx using +the mxModel function (or a lavaan model +using the lavaan function with option fit=FALSE). +Model must be syntactically correct within the framework chosen, and +converge to a solution.

    + + +
    data
    +

    Data.frame used in the model creation using +mxModel or lavaan are input here. Order +of modeled variables and predictors is not important when providing a +dataset to semtree.

    + + +
    control
    +

    semtree model specifications from +semtree.control are input here. Any changes from the default +setting can be specified here.

    + + +
    percentile_threshold
    +

    Numeric.

    + + +
    rounds
    +

    Numeric. Number of rounds of the BORUTA algorithm.

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.6.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/coef.semtree.html b/docs/reference/coef.semtree.html index 74e47e4..23fa2ba 100644 --- a/docs/reference/coef.semtree.html +++ b/docs/reference/coef.semtree.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -103,7 +103,7 @@

    Arguments

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/computePval_maxLR.html b/docs/reference/computePval_maxLR.html index b4cc447..a287623 100644 --- a/docs/reference/computePval_maxLR.html +++ b/docs/reference/computePval_maxLR.html @@ -20,7 +20,7 @@ semtree - 0.9.19 + 0.9.20 @@ -44,7 +44,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -101,7 +101,7 @@

    Arguments

    from

    numeric from interval (0, 1) specifying start of trimmed -sample period. With the default +sample period. With the default from = 0.15 the first and last 15 percent of observations are trimmed. This is only needed for continuous covariates.

    @@ -112,7 +112,7 @@

    Arguments

    nrep
    -

    numeric. Number of replications used for simulating from the asymptotic +

    numeric. Number of replications used for simulating from the asymptotic distribution (passed to efpFunctional). Only needed for ordinal covariates.

    @@ -140,7 +140,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/diversityMatrix.html b/docs/reference/diversityMatrix.html index 4ed38ec..5b6a210 100644 --- a/docs/reference/diversityMatrix.html +++ b/docs/reference/diversityMatrix.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -105,7 +105,7 @@

    Arguments

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/evaluate.html b/docs/reference/evaluate.html index 92c9e36..9f24fa1 100644 --- a/docs/reference/evaluate.html +++ b/docs/reference/evaluate.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -126,7 +126,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/evaluateDataLikelihood.html b/docs/reference/evaluateDataLikelihood.html index 68cee8c..12dc6fa 100644 --- a/docs/reference/evaluateDataLikelihood.html +++ b/docs/reference/evaluateDataLikelihood.html @@ -19,7 +19,7 @@ semtree - 0.9.19 + 0.9.20 @@ -43,7 +43,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -130,7 +130,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/evaluateTree.html b/docs/reference/evaluateTree.html index cd841ee..e161d5f 100644 --- a/docs/reference/evaluateTree.html +++ b/docs/reference/evaluateTree.html @@ -20,7 +20,7 @@ semtree - 0.9.19 + 0.9.20 @@ -44,7 +44,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -147,7 +147,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/findOtherSplits.html b/docs/reference/findOtherSplits.html index 7b2bd66..0a96e01 100644 --- a/docs/reference/findOtherSplits.html +++ b/docs/reference/findOtherSplits.html @@ -20,7 +20,7 @@ semtree - 0.9.19 + 0.9.20 @@ -44,7 +44,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -127,7 +127,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/fitSubmodels.html b/docs/reference/fitSubmodels.html index aa9d27f..baa33df 100644 --- a/docs/reference/fitSubmodels.html +++ b/docs/reference/fitSubmodels.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -125,7 +125,7 @@

    Arguments

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/getDepth.html b/docs/reference/getDepth.html index 30bde5d..d8f49f1 100644 --- a/docs/reference/getDepth.html +++ b/docs/reference/getDepth.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -107,7 +107,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/getHeight.html b/docs/reference/getHeight.html index aecf3ad..4164647 100644 --- a/docs/reference/getHeight.html +++ b/docs/reference/getHeight.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -109,7 +109,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/getLeafs.html b/docs/reference/getLeafs.html index 867035d..05ffd09 100644 --- a/docs/reference/getLeafs.html +++ b/docs/reference/getLeafs.html @@ -20,7 +20,7 @@ semtree - 0.9.19 + 0.9.20 @@ -44,7 +44,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -117,7 +117,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/getNodeById.html b/docs/reference/getNodeById.html index ca90d7a..9fc062e 100644 --- a/docs/reference/getNodeById.html +++ b/docs/reference/getNodeById.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -111,7 +111,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/getNumNodes.html b/docs/reference/getNumNodes.html index a14a90f..2caa285 100644 --- a/docs/reference/getNumNodes.html +++ b/docs/reference/getNumNodes.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -107,7 +107,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/getParDiffForest.html b/docs/reference/getParDiffForest.html index 57240bb..6c46131 100644 --- a/docs/reference/getParDiffForest.html +++ b/docs/reference/getParDiffForest.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -88,7 +88,7 @@

    Arguments

    measure

    a character. "wald" (default) gives the squared parameter -differences devided by their pooled standard errors. test" gives the +differences divided by their pooled standard errors. test" gives the contributions of the parameters to the test statistic. "raw" gives the absolute values of the parameter differences.

    @@ -127,7 +127,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/getParDiffTree.html b/docs/reference/getParDiffTree.html index bf9e385..2554209 100644 --- a/docs/reference/getParDiffTree.html +++ b/docs/reference/getParDiffTree.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -88,7 +88,7 @@

    Arguments

    measure

    a character. "wald" (default) gives the squared parameter -differences devided by their pooled standard errors. "test" gives the +differences divided by their pooled standard errors. "test" gives the contributions of the parameters to the test statistic."raw" gives the absolute values of the parameter differences.

    @@ -126,7 +126,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/getTerminalNodes.html b/docs/reference/getTerminalNodes.html index 11bd6c9..826922e 100644 --- a/docs/reference/getTerminalNodes.html +++ b/docs/reference/getTerminalNodes.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -107,7 +107,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/index.html b/docs/reference/index.html index aef3e7f..f4d6953 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -76,6 +76,10 @@

    All functions biodiversity()

    Quantify bio diversity of a SEM Forest

    + +

    boruta()

    + +

    BORUTA algorithm for SEM trees

    coef(<semtree>)

    @@ -209,14 +213,22 @@

    All functions se()

    SEMtrees Parameter Estimates Standard Error Table

    + +

    semforest()

    + +

    Create a SEM Forest

    semforest.control()

    SEM Forest Control Object

    -

    semforest()

    +

    .SCALE_METRIC

    -

    Create a SEM Forest

    +

    SEM Tree Package

    + +

    semtree()

    + +

    SEM Tree: Recursive Partitioning for Structural Equation Models

    semtree.constraints()

    @@ -225,10 +237,6 @@

    All functions semtree.control()

    SEM Tree Control Object

    - -

    semtree()

    - -

    SEM Tree: Recursive Partitioning for Structural Equation Models

    strip()

    @@ -262,7 +270,7 @@

    All functions
    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/isLeaf.html b/docs/reference/isLeaf.html index 9b6257a..1704863 100644 --- a/docs/reference/isLeaf.html +++ b/docs/reference/isLeaf.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package

  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -107,7 +107,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/kl.html b/docs/reference/kl.html index 228c87f..ec87c7b 100644 --- a/docs/reference/kl.html +++ b/docs/reference/kl.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -111,7 +111,7 @@

    Arguments

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/lgcm.html b/docs/reference/lgcm.html index 8e72fd4..582e91f 100644 --- a/docs/reference/lgcm.html +++ b/docs/reference/lgcm.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -98,7 +98,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/merge.semforest.html b/docs/reference/merge.semforest.html index ffae20a..c0b76d5 100644 --- a/docs/reference/merge.semforest.html +++ b/docs/reference/merge.semforest.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -120,7 +120,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/modelEstimates.html b/docs/reference/modelEstimates.html index 993a526..22c378e 100644 --- a/docs/reference/modelEstimates.html +++ b/docs/reference/modelEstimates.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -111,7 +111,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/outliers.html b/docs/reference/outliers.html index d86054e..7b203e5 100644 --- a/docs/reference/outliers.html +++ b/docs/reference/outliers.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -111,7 +111,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/parameters.html b/docs/reference/parameters.html index be10f39..351eac2 100644 --- a/docs/reference/parameters.html +++ b/docs/reference/parameters.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -133,7 +133,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/partialDependence.html b/docs/reference/partialDependence.html index eed3c02..0c6c4de 100644 --- a/docs/reference/partialDependence.html +++ b/docs/reference/partialDependence.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -121,7 +121,7 @@

    Arguments

    mc

    Integer. If mc is not NULL, the function will sample -mc number of rows from data with replacement, to estimate +mc number of rows from data with replacement, to estimate marginal dependency using Monte Carlo integration. This is less computationally expensive.

    @@ -152,7 +152,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/partialDependence_data.html b/docs/reference/partialDependence_data.html index 98459b2..84b24b0 100644 --- a/docs/reference/partialDependence_data.html +++ b/docs/reference/partialDependence_data.html @@ -19,7 +19,7 @@ semtree - 0.9.19 + 0.9.20 @@ -43,7 +43,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -143,7 +143,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/partialDependence_growth.html b/docs/reference/partialDependence_growth.html index 94eb68d..2ff2602 100644 --- a/docs/reference/partialDependence_growth.html +++ b/docs/reference/partialDependence_growth.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -123,7 +123,7 @@

    Arguments

    mc

    Integer. If mc is not NULL, the function will sample -mc number of rows from data with replacement, to estimate +mc number of rows from data with replacement, to estimate marginal dependency using Monte Carlo integration. This is less computationally expensive.

    @@ -166,7 +166,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/plotParDiffForest.html b/docs/reference/plotParDiffForest.html index 6770ad8..bfeda12 100644 --- a/docs/reference/plotParDiffForest.html +++ b/docs/reference/plotParDiffForest.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -101,7 +101,7 @@

    Arguments

    measure

    a character. "wald" (default) gives the squared parameter -differences devided by their pooled standard errors. "test" gives the +differences divided by their pooled standard errors. "test" gives the contributions of the parameters to the test statistic. "raw" gives the absolute values of the parameter differences.

    @@ -137,7 +137,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/plotParDiffTree.html b/docs/reference/plotParDiffTree.html index 868c6a4..ad13d74 100644 --- a/docs/reference/plotParDiffTree.html +++ b/docs/reference/plotParDiffTree.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -100,7 +100,7 @@

    Arguments

    measure

    a character. "wald" (default) gives the squared parameter -differences devided by their pooled standard errors. "test" gives the +differences divided by their pooled standard errors. "test" gives the contributions of the parameters to the test statistic. "raw" gives the absolute values of the parameter differences.

    @@ -137,7 +137,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/plotTreeStructure.html b/docs/reference/plotTreeStructure.html index fa29c2d..e567f38 100644 --- a/docs/reference/plotTreeStructure.html +++ b/docs/reference/plotTreeStructure.html @@ -19,7 +19,7 @@ semtree - 0.9.19 + 0.9.20 @@ -43,7 +43,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -118,7 +118,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/predict.semforest.html b/docs/reference/predict.semforest.html index 83d8f09..75bd082 100644 --- a/docs/reference/predict.semforest.html +++ b/docs/reference/predict.semforest.html @@ -17,7 +17,7 @@ semtree - 0.9.19 + 0.9.20 @@ -41,7 +41,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -121,7 +121,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/proximity.html b/docs/reference/proximity.html index 454f9c7..3ebf7a4 100644 --- a/docs/reference/proximity.html +++ b/docs/reference/proximity.html @@ -20,7 +20,7 @@ semtree - 0.9.19 + 0.9.20 @@ -44,7 +44,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -147,7 +147,7 @@

    Examples

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/prune.html b/docs/reference/prune.html index efb0f92..b260d99 100644 --- a/docs/reference/prune.html +++ b/docs/reference/prune.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -133,7 +133,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/se.html b/docs/reference/se.html index 43e8fb2..f2bd3bb 100644 --- a/docs/reference/se.html +++ b/docs/reference/se.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -133,7 +133,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/semforest.control.html b/docs/reference/semforest.control.html index 63de354..434c854 100644 --- a/docs/reference/semforest.control.html +++ b/docs/reference/semforest.control.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -131,7 +131,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/semforest.html b/docs/reference/semforest.html index 6f681fa..d392d0a 100644 --- a/docs/reference/semforest.html +++ b/docs/reference/semforest.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -149,7 +149,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/semtree-package.html b/docs/reference/semtree-package.html new file mode 100644 index 0000000..76faa80 --- /dev/null +++ b/docs/reference/semtree-package.html @@ -0,0 +1,109 @@ + +SEM Tree Package — semtree-package • semtree + + +
    +
    + + + +
    +
    + + +
    +

    SEM Tree Package

    +
    + +
    +
    .SCALE_METRIC
    +
    + +
    +

    Format

    +

    An object of class numeric of length 1.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.6.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/semtree.constraints.html b/docs/reference/semtree.constraints.html index cb5b12f..4a16649 100644 --- a/docs/reference/semtree.constraints.html +++ b/docs/reference/semtree.constraints.html @@ -22,7 +22,7 @@ semtree - 0.9.19 + 0.9.20 @@ -46,7 +46,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -138,7 +138,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/semtree.control.html b/docs/reference/semtree.control.html index ddbc714..6df300c 100644 --- a/docs/reference/semtree.control.html +++ b/docs/reference/semtree.control.html @@ -31,7 +31,7 @@ semtree - 0.9.19 + 0.9.20 @@ -55,7 +55,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -104,7 +104,7 @@

    SEM Tree Control Object

    semtree.control(
    -  method = "naive",
    +  method = c("naive", "score", "fair", "fair3"),
       min.N = 20,
       max.depth = NA,
       alpha = 0.05,
    @@ -128,7 +128,8 @@ 

    SEM Tree Control Object

    strucchange.from = 0.15, strucchange.to = NULL, strucchange.nrep = 50000, - refit = TRUE + refit = TRUE, + ctsem_sd = FALSE )
    @@ -242,7 +243,7 @@

    Arguments

    use.maxlm
    -

    Use MaxLm statistic

    +

    Use MaxLR statistic for split point selection (as proposed by Arnold et al., 2021)

    strucchange.from
    @@ -263,6 +264,11 @@

    Arguments

    If TRUE (default) the initial model is fitted on the data provided to semtree.

    + +
    ctsem_sd
    +

    If FALSE (default) no standard errors of CT model parameters +are computed. Requesting standard errors increases runtime.

    +

    Value

    @@ -275,6 +281,7 @@

    References

    Brandmaier, A.M., Oertzen, T. v., McArdle, J.J., & Lindenberger, U. (2013). Structural equation model trees. Psychological Methods, 18(1), 71-86.

    +

    Arnold, M., Voelkle, M. C., & Brandmaier, A. M. (2021). Score-guided structural equation model trees. Frontiers in Psychology, 11, Article 564403. https://doi.org/10.3389/fpsyg.2020.564403

    See also

    @@ -327,7 +334,7 @@

    Examples

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/semtree.html b/docs/reference/semtree.html index 6eb83f8..d773752 100644 --- a/docs/reference/semtree.html +++ b/docs/reference/semtree.html @@ -22,7 +22,7 @@ semtree - 0.9.19 + 0.9.20 @@ -46,7 +46,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -206,7 +206,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/strip.html b/docs/reference/strip.html index 557baee..3fdd227 100644 --- a/docs/reference/strip.html +++ b/docs/reference/strip.html @@ -19,7 +19,7 @@ semtree - 0.9.19 + 0.9.20 @@ -43,7 +43,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -133,7 +133,7 @@

    Examples

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/subforest.html b/docs/reference/subforest.html index 6f51e04..1a56b3c 100644 --- a/docs/reference/subforest.html +++ b/docs/reference/subforest.html @@ -19,7 +19,7 @@ semtree - 0.9.19 + 0.9.20 @@ -43,7 +43,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -124,7 +124,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/subtree.html b/docs/reference/subtree.html index 1df7dc7..a7c7a16 100644 --- a/docs/reference/subtree.html +++ b/docs/reference/subtree.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -139,7 +139,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/toTable.html b/docs/reference/toTable.html index d33d3b5..12f3f0a 100644 --- a/docs/reference/toTable.html +++ b/docs/reference/toTable.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -87,11 +87,11 @@

    Arguments

    added.param.cols
    -

    Add extra columns with parameter estimates.

    +

    String. Add extra columns with parameter estimates. Pass a vector with the names of the parameters that should be rendered in the table.

    round.param
    -

    Number of digits to round parameter estiamtes

    +

    Integer. Number of digits to round parameter estimates. Default is no rounding (NULL)

    @@ -118,7 +118,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/reference/varimp.html b/docs/reference/varimp.html index 25e3739..55095aa 100644 --- a/docs/reference/varimp.html +++ b/docs/reference/varimp.html @@ -18,7 +18,7 @@ semtree - 0.9.19 + 0.9.20 @@ -42,7 +42,7 @@ Getting Started with the semtree package
  • - Score-based Tests + SEM Trees with score-based tests
  • Focus parameters in SEM forests @@ -145,7 +145,7 @@

    Author

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.6.

    diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 389ed65..56460eb 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,6 +3,15 @@ /404.html + + /CONTRIBUTE.html + + + /CONTRIBUTORS.html + + + /LICENSE.html + /articles/constraints.html @@ -25,23 +34,20 @@ /authors.html - /CONTRIBUTE.html - - - /CONTRIBUTORS.html + /cran_comments.html /index.html - - /LICENSE.html - /news/index.html /reference/biodiversity.html + + /reference/boruta.html + /reference/coef.semtree.html @@ -159,6 +165,9 @@ /reference/semforest.html + + /reference/semtree-package.html + /reference/semtree.constraints.html diff --git a/man/computePval_maxLR.Rd b/man/computePval_maxLR.Rd index 0643ccb..3de00dc 100644 --- a/man/computePval_maxLR.Rd +++ b/man/computePval_maxLR.Rd @@ -17,14 +17,14 @@ measurement from the covariate and the bin size for ordinal and categorical covariates.} \item{from}{numeric from interval (0, 1) specifying start of trimmed -sample period. With the default +sample period. With the default from = 0.15 the first and last 15 percent of observations are trimmed. This is only needed for continuous covariates.} \item{to}{numeric from interval (0, 1) specifying end of trimmed sample period. By default, to is 1.} -\item{nrep}{numeric. Number of replications used for simulating from the asymptotic +\item{nrep}{numeric. Number of replications used for simulating from the asymptotic distribution (passed to efpFunctional). Only needed for ordinal covariates.} } diff --git a/man/getParDiffForest.Rd b/man/getParDiffForest.Rd index 1d2711e..12ebbbc 100644 --- a/man/getParDiffForest.Rd +++ b/man/getParDiffForest.Rd @@ -10,7 +10,7 @@ getParDiffForest(forest, measure = "wald", normalize = FALSE) \item{forest}{a semforest object.} \item{measure}{a character. "wald" (default) gives the squared parameter -differences devided by their pooled standard errors. test" gives the +differences divided by their pooled standard errors. test" gives the contributions of the parameters to the test statistic. "raw" gives the absolute values of the parameter differences.} diff --git a/man/getParDiffTree.Rd b/man/getParDiffTree.Rd index a0fa732..7e1f0cd 100644 --- a/man/getParDiffTree.Rd +++ b/man/getParDiffTree.Rd @@ -10,7 +10,7 @@ getParDiffTree(tree, measure = "wald", normalize = FALSE) \item{tree}{a semtree object.} \item{measure}{a character. "wald" (default) gives the squared parameter -differences devided by their pooled standard errors. "test" gives the +differences divided by their pooled standard errors. "test" gives the contributions of the parameters to the test statistic."raw" gives the absolute values of the parameter differences.} diff --git a/man/partialDependence.Rd b/man/partialDependence.Rd index c8e9bed..18a5226 100644 --- a/man/partialDependence.Rd +++ b/man/partialDependence.Rd @@ -35,7 +35,7 @@ variables named in \code{reference.var}.} dependence values; for example, the mean and +/- 1SD of \code{reference.var}.} \item{mc}{Integer. If \code{mc} is not \code{NULL}, the function will sample -\code{mc} number of rows from \code{data} with replacement, to estimate +\code{mc} number of rows from \code{data} with replacement, to estimate marginal dependency using Monte Carlo integration. This is less computationally expensive.} diff --git a/man/partialDependence_growth.Rd b/man/partialDependence_growth.Rd index 4172cf2..dbf0031 100644 --- a/man/partialDependence_growth.Rd +++ b/man/partialDependence_growth.Rd @@ -37,7 +37,7 @@ variables named in \code{reference.var}.} dependence values; for example, the mean and +/- 1SD of \code{reference.var}.} \item{mc}{Integer. If \code{mc} is not \code{NULL}, the function will sample -\code{mc} number of rows from \code{data} with replacement, to estimate +\code{mc} number of rows from \code{data} with replacement, to estimate marginal dependency using Monte Carlo integration. This is less computationally expensive.} diff --git a/man/plotParDiffForest.Rd b/man/plotParDiffForest.Rd index f72027d..e5d3952 100644 --- a/man/plotParDiffForest.Rd +++ b/man/plotParDiffForest.Rd @@ -21,7 +21,7 @@ are "boxplot" (default) and "jitter" for a jittered strip plot with mean and standard deviation.} \item{measure}{a character. "wald" (default) gives the squared parameter -differences devided by their pooled standard errors. "test" gives the +differences divided by their pooled standard errors. "test" gives the contributions of the parameters to the test statistic. "raw" gives the absolute values of the parameter differences.} diff --git a/man/plotParDiffTree.Rd b/man/plotParDiffTree.Rd index c3a055f..8b42d0e 100644 --- a/man/plotParDiffTree.Rd +++ b/man/plotParDiffTree.Rd @@ -20,7 +20,7 @@ plotParDiffTree( are "ballon" (default), "heatmap", and "bar".} \item{measure}{a character. "wald" (default) gives the squared parameter -differences devided by their pooled standard errors. "test" gives the +differences divided by their pooled standard errors. "test" gives the contributions of the parameters to the test statistic. "raw" gives the absolute values of the parameter differences.} diff --git a/man/semforest.control.Rd b/man/semforest.control.Rd index 0a0d309..a523105 100644 --- a/man/semforest.control.Rd +++ b/man/semforest.control.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/semforest.control.R \name{semforest.control} \alias{semforest.control} +\alias{semforest_control} \alias{print.semforest.control} \alias{semforest_score_control} \title{SEM Forest Control Object} diff --git a/man/semtree-package.Rd b/man/semtree-package.Rd new file mode 100644 index 0000000..04d5fa4 --- /dev/null +++ b/man/semtree-package.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/semtree-package.R +\docType{data} +\name{semtree-package} +\alias{semtree-package} +\alias{.SCALE_METRIC} +\title{SEM Tree Package} +\format{ +An object of class \code{numeric} of length 1. +} +\usage{ +.SCALE_METRIC +} +\description{ +SEM Tree Package +} +\keyword{datasets} diff --git a/man/semtree.Rd b/man/semtree.Rd index 7069857..4cbeea0 100644 --- a/man/semtree.Rd +++ b/man/semtree.Rd @@ -92,7 +92,7 @@ split from this node for further testing. 3. "score" uses score-based test statistics. These statistics are much faster than the classic SEM tree approach while having favorable -statistical properties. +statistical properties. All other parameters controlling the tree growing process are available through a separate \code{\link{semtree.control}} object. diff --git a/man/semtree.control.Rd b/man/semtree.control.Rd index 17bb8ac..7ce8847 100644 --- a/man/semtree.control.Rd +++ b/man/semtree.control.Rd @@ -3,10 +3,11 @@ \name{semtree.control} \alias{semtree.control} \alias{print.semtree.control} +\alias{semtree_control} \title{SEM Tree Control Object} \usage{ semtree.control( - method = "naive", + method = c("naive", "score", "fair", "fair3"), min.N = 20, max.depth = NA, alpha = 0.05, @@ -102,7 +103,7 @@ set to one, bonferroni correction counts the number of variables tested.} \item{missing}{Missing value treatment. Default is ignore} -\item{use.maxlm}{Use MaxLm statistic} +\item{use.maxlm}{Use MaxLR statistic for split point selection (as proposed by Arnold et al., 2021)} \item{strucchange.from}{Strucchange argument. See their package documentation.} @@ -156,6 +157,8 @@ implements modern score-based statistics. Brandmaier, A.M., Oertzen, T. v., McArdle, J.J., & Lindenberger, U. (2013). Structural equation model trees. \emph{Psychological Methods}, 18(1), 71-86. + +Arnold, M., Voelkle, M. C., & Brandmaier, A. M. (2021). Score-guided structural equation model trees. \emph{Frontiers in Psychology}, 11, Article 564403. https://doi.org/10.3389/fpsyg.2020.564403 } \seealso{ \code{\link{semtree}} diff --git a/misc/debug_importance.R b/misc/debug_importance.R new file mode 100644 index 0000000..0419403 --- /dev/null +++ b/misc/debug_importance.R @@ -0,0 +1,117 @@ +# +# this is code to debug variable importance estimates for +# focus parameters +# +# here, there are three predictors and two model parameters +# x1 and x2 predict differences in mux +# x3 predicts differences in varx +# +# the effect of x1 is half the size of x2 +# +# if additional variables (x4, x5, ...) are generated, +# they have no effect on the outcome +# + + +library(partykit) +library(semtree) +library(tictoc) + +num_noise <- 1 + +set.seed(123) +N <- 400 +x1 <- rbinom(N, size = 1, prob=.5) +x2 <- rbinom(N, size = 1, prob=.5) +x3 <- rbinom(N, size = 1, prob=.5) + +y <- rnorm(N, x1+0.5*x2, sd=(2*x3+1)) + +x1 <- factor(x1) +x2 <- factor(x2) +x3 <- factor(x3) + +sim.data <- data.frame(y,x1,x2,x3) + +if (num_noise>0) +for (i in 1:num_noise) { + xnoi <- factor(rbinom(N, size = 1, prob=.5)) + sim.data <- cbind(sim.data, xnoi) +} + +names(sim.data) <- c("y", paste0("x",1:(ncol(sim.data)-1))) + +manifests <- "y" +observed.model <- mxModel("Linear Growth Curve Model Path Specification", + type="RAM", + mxData( + sim.data, + type="raw" + ), + manifestVars=manifests, + latentVars=c(), + # variance + mxPath( + from=manifests, + arrows=2, + free=TRUE, + values = c(1), + labels=c("varx") + ), + + # means + mxPath( + from="one", + to=manifests, + arrows=1, + free=TRUE, + values=c(0), + labels=c("mux") + ) +) # close model + +#tree <- semtree(model = observed.model, data=sim.data,control = semtree.control(method="score")) + +ctrl <- semforest_score_control(num.trees=200) + +ctrl <- semforest.control(num.trees=200) +ctrl$semtree.control$method <- "score" + +tic() + +#parallel::makeCluster(7) +library(future) +plan(multisession, workers=15) +#plan(sequential) +frst <- semforest(model = observed.model, data=sim.data, + control = ctrl, + constraints = semtree.constraints(focus.parameters = "mux")) +#parallel::stopCluster() + +vim <- semtree::varimp(frst) +vimp <- semtree::varimp(frst,method="permutationFocus") + +plot(vimp) + +frst2 <- semforest(model = observed.model, data=sim.data, + control = ctrl, + constraints = semtree.constraints(focus.parameters = "varx")) +vimp2 <- semtree::varimp(frst2,method="permutationFocus") + +toc() + +plot(vimp2) +plot(vimp) + +semtree:::varimpTree( frst$forest[[5]], frst$forest.data[[5]],method = "permutationFocus",var.names = "x2" ) + +#semtree:::varimpFocus( frst$forest[[5]], frst$forest.data[[5]], cov.name = "x1") + +#debug(semtree:::varimpFocus) + +# plot(vimp) should show that there is an effect of x1 and (with half the effect size) an +# effect of x2 +plot(vimp) + +# plot(vimp2) should show that there is only an effect of x3 +plot(vimp2) diff --git a/misc/generate_data.R b/misc/generate_data.R new file mode 100644 index 0000000..2e8fedb --- /dev/null +++ b/misc/generate_data.R @@ -0,0 +1,17 @@ +# +# generate noise variables +# +N <- 1000 +noise1 <- sample(c(0,1),size=N, replace=TRUE) +noise2 <- sample(c(0,1,2,3),size=N, replace=TRUE) +noise3 <- sample(1:10,size=N, replace=TRUE) +noise4 <- rnorm(N) + +pred1 <- rnorm(N) +pred2 <- ifelse(pred1+rnorm(N,0,0.1)>0,1,0) +pred3 <- rnorm(N) + +latvar <- ifelse(pred1>0, 0, 5)+rnorm(N) + +obs <- latvar %*% t(c(0.8,0.9,0.7)) +obs <- obs + rnorm(N*3,0,0.1) diff --git a/misc/ice_tests.R b/misc/ice_tests.R new file mode 100644 index 0000000..89a46a6 --- /dev/null +++ b/misc/ice_tests.R @@ -0,0 +1,49 @@ +ice.semforest_stripped <- function(x, data, reference.var, + support = 20, points = NULL, mc = NULL, FUN = "median", ...){ + #browser() + cl <- match.call() + cl <- cl[c(1L, which(names(cl) %in% c("data", "reference.var", "support", "points", "mc") + ))] + cl[[1L]] <- str2lang("semtree:::partialDependence_data") + mp <- eval.parent(cl) + preds <- data.table::data.table(predict(x, data = mp, type = "pars")) + mp[,names(mp)[-which(names(mp) %in% c(reference.var, colnames(preds)))]:=NULL] + mp <- cbind(mp, preds) + mp$ID <- rep(1:nrow(data), each=support) + return(mp) +} + +set.seed(325) +N <- 200 +pred1 <- factor(sample(c("red","green","blue"),N,replace=TRUE)) +pred2 <- ordered(sample(c(0,1,2),N,replace=TRUE)) +pred3 <- as.numeric(sample(1:20,size = N,replace=TRUE)) +noisy <- rnorm(N) +noisy2 <- rnorm(N) +noisy3 <- rnorm(N) + +x <- rnorm(N) +#x <- rnorm(N)+ifelse(pred2=="1",10,0) +x <- x + pred3/10 +df <- data.frame(x, pred3,noisy,noisy2,noisy3) + +model = "x ~~ var*x; x~ mu*0" +fitted_model <- lavaan(model, df) +forst = semforest(fitted_model, df, + control=semforest.control(control=semtree.control(method="score", + verbose=FALSE,report.level=99,alpha = 1),num.trees = 100)) + + + +# should be: about x=1 for pred3=.1 and x=20 for pred3=2 + +pd = partialDependence(forst, reference.var="pred3") +plot(pd, parameter="mu") + +mp=ice.semforest_stripped(forest, forest$data, reference.var="pred3") +mp + +ggplot(mp, aes(x=pred3,y=mu, group=factor(ID)))+geom_line() + + +plot(pd,parameter="mu") diff --git a/tests/control.R b/tests/control.R new file mode 100644 index 0000000..d7b4465 --- /dev/null +++ b/tests/control.R @@ -0,0 +1,94 @@ +require(semtree) + + +data(lgcm) + +lgcm$agegroup <- ordered(lgcm$agegroup, labels=c("young","old") ) +lgcm$training <- as.factor(lgcm$training) +lgcm$noise <- as.factor(lgcm$noise) + +# LOAD IN OPENMX MODEL. +# A SIMPLE LINEAR GROWTH MODEL WITH 5 TIME POINTS FROM SIMULATED DATA + +manifests <- names(lgcm)[1:5] +lgcModel <- mxModel("Linear Growth Curve Model Path Specification", + type="RAM", + manifestVars=manifests, + latentVars=c("intercept","slope"), + # residual variances + mxPath( + from=manifests, + arrows=2, + free=TRUE, + values = c(1, 1, 1, 1, 1), + labels=c("residual1","residual2","residual3","residual4","residual5") + ), + # latent variances and covariance + mxPath( + from=c("intercept","slope"), + connect="unique.pairs", + arrows=2, + free=TRUE, + values=c(1, 1, 1), + labels=c("vari", "cov", "vars") + ), + # intercept loadings + mxPath( + from="intercept", + to=manifests, + arrows=1, + free=FALSE, + values=c(1, 1, 1, 1, 1) + ), + # slope loadings + mxPath( + from="slope", + to=manifests, + arrows=1, + free=FALSE, + values=c(0, 1, 2, 3, 4) + ), + # manifest means + mxPath( + from="one", + to=manifests, + arrows=1, + free=FALSE, + values=c(0, 0, 0, 0, 0) + ), + # latent means + mxPath( + from="one", + to=c("intercept", "slope"), + arrows=1, + free=TRUE, + values=c(1, 1), + labels=c("meani", "means") + ), + mxData(lgcm,type="raw") +) + + +# TREE CONTROL OPTIONS. +# TO OBTAIN BASIC/DEFAULT SMETREE OPTIONS, SIMPLY TPYE THE FOLLOWING: + +controlOptions <- semtree.control(method = "naive",max.depth = 0,min.N=NULL, + min.bucket=NULL) + +# RUN TREE. + +tree <- semtree(model=lgcModel, data=lgcm, control = controlOptions) + +stopifnot(tree$control$min.N==50) +stopifnot(tree$control$min.bucket==25) + + + +x<-semtree_control() +semtree:::check.semtree.control(x) + +x<-semtree_control(min.N=100) +semtree:::check.semtree.control(x) + +x<-semtree_control(min.N=100, min.bucket=10) +semtree:::check.semtree.control(x) diff --git a/tests/lavaan.R b/tests/lavaan.R index 67ea1c1..735a320 100644 --- a/tests/lavaan.R +++ b/tests/lavaan.R @@ -15,7 +15,7 @@ data(lgcm) lgcm$agegroup <- as.ordered(lgcm$agegroup) lgcm$training <- as.factor(lgcm$training) -lgcm$noise <- as.numeric(lgcm$noise) +lgcm$noise <- as.factor(lgcm$noise) # LOAD IN LAVAAN MODEL. # A SIMPLE LINEAR GROWTH MODEL WITH 5 TIME POINTS FROM SIMULATED DATA @@ -37,7 +37,7 @@ lgcModel <- lavaan(lgcModelstr, lgcm, do.fit=TRUE) # TREE CONTROL OPTIONS. # TO OBTAIN BASIC/DEFAULT SMETREE OPTIONS, SIMPLY TPYE THE FOLLOWING: -controlOptions <- semtree.control() +controlOptions <- semtree.control(method="score") # THE CONTENTS OF THE DEFAULT CONTROLS CAN THEN BE VIEWED. @@ -92,6 +92,4 @@ treeSub <- subtree(tree, startNode=3) controlOptions$method <- "fair" tree2 <- semtree(model=lgcModel, data=lgcm, control = controlOptions) -# disabled for time restrictions on CRAN -#controlOptions$method <- "cv" -#tree3 <- semtree(model=lgcModel, data=lgcm, control = controlOptions) +toTable(tree2) diff --git a/tests/testthat/definitionvariable.R b/tests/testthat/definitionvariable.R index 9cdbc5e..2f48109 100644 --- a/tests/testthat/definitionvariable.R +++ b/tests/testthat/definitionvariable.R @@ -1,3 +1,7 @@ + +# skip long running tests on CRAN +skip_on_cran() + # OpenMx model with definition variables # diff --git a/tests/testthat/forced_splitl.R b/tests/testthat/forced_splitl.R new file mode 100644 index 0000000..b8cbdf7 --- /dev/null +++ b/tests/testthat/forced_splitl.R @@ -0,0 +1,28 @@ +library(lavaan) +library(semtree) +set.seed(1238) + +N <- 500 + +# simulate data +da <- data.frame(y = c(rnorm(N/2, mean = -1), rnorm(N/2, mean = 1)), + z = factor(rep(c(0,1),each=N/2)),k=rnorm(N),m=rnorm(N) ) + +m_lav <- ' +y ~~ y +y ~ 1 +' + +fit_lav <- lavaan(model = m_lav, data = da) + + +tree = semtree(model=fit_lav, data=da, + control = semtree_control(method="score"), + forced_splits=NULL) + + + +tree_forced_m = semtree(model=fit_lav, data=da, + control = semtree_control(method="score"), + forced_splits=c("m")) + diff --git a/tests/invariance.R b/tests/testthat/invariance.R similarity index 100% rename from tests/invariance.R rename to tests/testthat/invariance.R diff --git a/tests/testthat/prediction.R b/tests/testthat/prediction.R index 9f05ddd..74722c0 100644 --- a/tests/testthat/prediction.R +++ b/tests/testthat/prediction.R @@ -1,3 +1,5 @@ +skip_on_cran() + N<-100 x<-rnorm(N) y<-rnorm(N) diff --git a/tests/testthat/scores.R b/tests/testthat/scores.R index 4010fad..469c8e7 100644 --- a/tests/testthat/scores.R +++ b/tests/testthat/scores.R @@ -1,3 +1,7 @@ + +# skip long running tests on CRAN +skip_on_cran() + require("semtree") data(lgcm) diff --git a/tests/testthat/test-basic-splitting.R b/tests/testthat/test-basic-splitting.R index 70f903a..5d0fd78 100644 --- a/tests/testthat/test-basic-splitting.R +++ b/tests/testthat/test-basic-splitting.R @@ -2,7 +2,7 @@ library(lavaan) # skip long running tests on CRAN -skip_on_cran() +testthat::skip_on_cran() # # test basic splitting @@ -34,7 +34,7 @@ model = "x ~~ x" fitted_model <- lavaan(model, df) tree = semtree(fitted_model, df, control=semtree.control()) test_that("result is a tree",{ expect_equal(class(tree),"semtree")}) -test_that("tree depth is more than 1", { expect_gt(getDepth(tree),3) }) +test_that("tree depth is more than 1", { expect_gt(getDepth(tree),1) }) test_that("first split is optimal", {expect_equal(tree$rule$value,"one")}) # testing unordered, named factors @@ -49,16 +49,17 @@ test_that("tree depth is at least 2", { expect_gt(getDepth(tree),1) }) test_that("first split is optimal", {expect_equal(as.character(tree$rule$value),"green")}) # testing ordered, numeric -set.seed(23334653) +set.seed(2333463) x = rnorm(n) x <- x * ifelse( (var_ordered <= 2), .5, 10) df <- data.frame(x, var_ordered) model = "x ~~ x" fitted_model <- lavaan(model, df) -tree = semtree(fitted_model, df, control=semtree.control(max.depth=3)) +tree = semtree(fitted_model, df, + control=semtree.control(max.depth=3)) plot(tree) test_that("result is a tree",{ expect_equal(class(tree),"semtree")}) -test_that("tree depth is 3", { expect_equal(getDepth(tree),3) }) +test_that("tree depth is 2", { expect_equal(getDepth(tree),2) }) test_that("split is optimal", { expect_equal(tree$caption, "var_ordered > 2")}) # testing numeric diff --git a/tests/testthat/test-compare_full_light.R b/tests/testthat/test-compare_full_light.R index efd2e08..e8e089d 100644 --- a/tests/testthat/test-compare_full_light.R +++ b/tests/testthat/test-compare_full_light.R @@ -1,8 +1,7 @@ # DEMO FOR SEMTREE require("semtree") require("lavaan") -#require("future") -#plan(sequential) + data(lgcm) lgcm$agegroup <- ordered(lgcm$agegroup) diff --git a/tests/testthat/test-dummy-split.R b/tests/testthat/test-dummy-split.R new file mode 100644 index 0000000..3893b86 --- /dev/null +++ b/tests/testthat/test-dummy-split.R @@ -0,0 +1,19 @@ +library(lavaan) + + +set.seed(458) +n <- 500 +var_unordered <- factor(sample(c("lightning","rain","sunshine","snow"),n,TRUE)) +var_grp <- factor((var_unordered %in% c("rain","sunshine"))) +x <- rnorm(n)+ifelse(var_grp==TRUE,20,0) + +# data frame has only a dummy predictor +df <- data.frame(x=x, var_grp) + +fit <- lavaan("x~~x",df) + +tree <- semtree(fit, df, control = semtree.control(method="score")) + +testthat::expect_equal(tree$rule$name, "var_grp") +testthat::expect_equal(tree$rule$value, "FALSE") + diff --git a/tests/testthat/test-forest-focus.R b/tests/testthat/test-forest-focus.R index a81fa6d..ad65c88 100644 --- a/tests/testthat/test-forest-focus.R +++ b/tests/testthat/test-forest-focus.R @@ -1,7 +1,12 @@ +# skip parallel tests on CRAN +skip_on_cran() + +if (require("future")) { + require("semtree") data(lgcm) -library(future) + future::plan(multisession, workers=5) lgcm$agegroup <- ordered(lgcm$agegroup) @@ -91,4 +96,6 @@ vim <- varimp(forest, method = "permutationFocus") #plot(vim) #plot(vim_naive) -aggregate() \ No newline at end of file +aggregate() + +} \ No newline at end of file diff --git a/tests/testthat/test-future.R b/tests/testthat/test-future.R index 013d4a6..0774d78 100644 --- a/tests/testthat/test-future.R +++ b/tests/testthat/test-future.R @@ -1,3 +1,7 @@ +skip_on_cran() + +if (require(future)) { + library(future) library(lavaan) library(semtree) @@ -33,5 +37,5 @@ testrun <- function(mode="sequential"){ #result1<-testrun("sequential") result2<-testrun("parallel") - +} diff --git a/tests/testthat/test-partial-dependence.R b/tests/testthat/test-partial-dependence.R index 5cff4ad..2df8540 100644 --- a/tests/testthat/test-partial-dependence.R +++ b/tests/testthat/test-partial-dependence.R @@ -1,3 +1,7 @@ + +# skip long running tests on CRAN +skip_on_cran() + #context("test partial dependence") library(lavaan) diff --git a/tests/testthat/test-score-forest.R b/tests/testthat/test-score-forest.R index 401dc82..dc7475b 100644 --- a/tests/testthat/test-score-forest.R +++ b/tests/testthat/test-score-forest.R @@ -1,3 +1,8 @@ + +# skip long running tests on CRAN +skip_on_cran() + + require("semtree") data(lgcm) diff --git a/tests/testthat/test-score-minN.R b/tests/testthat/test-score-minN.R index 246619f..a92eaa5 100644 --- a/tests/testthat/test-score-minN.R +++ b/tests/testthat/test-score-minN.R @@ -1,3 +1,7 @@ + +# skip long running tests on CRAN +skip_on_cran() + require("semtree") data(lgcm) diff --git a/tests/testthat/test-score-tests.R b/tests/testthat/test-score-tests.R index 6fb423e..b0306b2 100644 --- a/tests/testthat/test-score-tests.R +++ b/tests/testthat/test-score-tests.R @@ -1,17 +1,17 @@ -context("test basic splitting based on level of covariate with score tests") +#context("test basic splitting based on level of covariate with score tests") + +# skip long running tests on CRAN +testthat::skip_on_cran() library(lavaan) library(semtree) -# skip long running tests on CRAN -skip_on_cran() - # generate observations of an ordered factor with labels set.seed(458) n <- 1000 var_unordered <- factor(sample(c("lightning","rain","sunshine","snow"),n,TRUE)) x <- rnorm(n)+ifelse(var_unordered=="rain",20,0) -x <- x+ifelse(var_unordered=="sunshine",40,0) +x <- x+ifelse(var_unordered=="sunshine",20,0) df <- data.frame(x, var_unordered) model = "x ~~ x; x ~mu*1" diff --git a/tests/testthat/test-semforest-focus-bivariate-2.R b/tests/testthat/test-semforest-focus-bivariate-2.R index b32fa3e..b90ccb2 100644 --- a/tests/testthat/test-semforest-focus-bivariate-2.R +++ b/tests/testthat/test-semforest-focus-bivariate-2.R @@ -1,3 +1,7 @@ +skip_on_cran() + +if (require(future)) { + N <- 2000 library(semtree) @@ -52,5 +56,8 @@ vim_focus <- varimp(forest, method="permutationFocus") vimdat <- data.frame( vim=rep(c("naive","focus"),each=3), param=rep(c("pred1","pred2","noise"),2), vals=c( semtree:::aggregateVarimp(vim_naive),semtree:::aggregateVarimp(vim_focus))) -library(tidyverse) +library(ggplot2) +library(tidyr) vimdat %>% ggplot(aes(x=vim,y=vals,group=param,fill=param))+geom_col(position="dodge") + +} \ No newline at end of file diff --git a/tests/testthat/test-semforest-focus-bivariate-lavaan.R b/tests/testthat/test-semforest-focus-bivariate-lavaan.R index a11820d..bab6998 100644 --- a/tests/testthat/test-semforest-focus-bivariate-lavaan.R +++ b/tests/testthat/test-semforest-focus-bivariate-lavaan.R @@ -1,3 +1,7 @@ +skip_on_cran() + +if (require(future)) { + N <- 2000 library(semtree) @@ -66,6 +70,8 @@ vimdat <- data.frame( vim=rep(c("naive","focus"),each=3), vals=c( semtree:::aggregateVarimp(vim_naive),semtree:::aggregateVarimp(vim_focus))) return(vimdat) } -#library(tidyverse) + #vimdat %>% ggplot(aes(x=vim,y=vals,group=param,fill=param))+geom_col(position="dodge") rs <- sapply( c( model_lav, model_omx), FUN=sim) + +} \ No newline at end of file diff --git a/tests/testthat/test-semforest-focus-bivariate.R b/tests/testthat/test-semforest-focus-bivariate.R index 13ccd78..4ae9b8e 100644 --- a/tests/testthat/test-semforest-focus-bivariate.R +++ b/tests/testthat/test-semforest-focus-bivariate.R @@ -1,3 +1,7 @@ +skip_on_cran() + +if (require(future)) { + N <- 2000 library(semtree) @@ -52,5 +56,8 @@ vim_focus <- varimp(forest, method="permutationFocus") vimdat <- data.frame( vim=rep(c("naive","focus"),each=3), param=rep(c("pred1","pred2","noise"),2), vals=c( semtree:::aggregateVarimp(vim_naive),semtree:::aggregateVarimp(vim_focus))) -library(tidyverse) +library(ggplot2) +library(tidyr) vimdat %>% ggplot(aes(x=vim,y=vals,group=param,fill=param))+geom_col(position="dodge") + +} \ No newline at end of file diff --git a/tests/testthat/test-semtree_control.R b/tests/testthat/test-semtree_control.R new file mode 100644 index 0000000..4a35729 --- /dev/null +++ b/tests/testthat/test-semtree_control.R @@ -0,0 +1,28 @@ +skip_on_cran() + + +testthat::test_that("control object is created and checked correctly", { + ctrl <- semtree::semtree.control(min.N = NULL) + chck <- semtree:::check.semtree.control(ctrl) + expect_true(chck) +} ) + + +testthat::test_that("", { + + library(semtree) + library(lavaan) + + n <- 1000 + + x <- rnorm(n) + + # data frame has only a dummy predictor + df <- data.frame(x=x) + + fit <- lavaan("x~~x",df) + + ctrl <- semforest_control(mtry=NULL) + + semforest(model, data, control=ctrl) +}) diff --git a/tests/testthat/test-single-predictor.R b/tests/testthat/test-single-predictor.R new file mode 100644 index 0000000..788ae84 --- /dev/null +++ b/tests/testthat/test-single-predictor.R @@ -0,0 +1,41 @@ +library(lavaan) +library(semtree) +set.seed(1238) + +N <- 500 + +# simulate data with Cohen's d = 2 +da <- data.frame(y = c(rnorm(N/2, mean = -1), rnorm(N/2, mean = 1)), + z = rep(c(0,1),each=N/2) ) + +m_lav <- ' +y ~~ y +y ~ 1 +' + +m_lav_constrained <- ' +y ~~ y +y ~ c(a,a)*1 +' + +####Testing semtree with lavaan models #### +fit_lav <- lavaan(model = m_lav, data = Data) + +forest <- semforest(model=fit_lav, data=da, + control = semforest.control( + num.trees = 50, control=semtree_control(method="score"))) + +vim <- varimp(forest) + +plot(vim) + +zimp <- semtree:::aggregateVarimp(vim) + +fit_lav_multigroup <- lavaan(model = m_lav, data = da,group = "z") +fit_lav_multigroup2 <- lavaan(model = m_lav_constrained, data = da,group = "z") +lrt <- anova(fit_lav_multigroup,fit_lav_multigroup2) +chi2 <- lrt$`Chisq diff`[2] + + +cat("Importance: ", zimp,"\n") +cat("MG Chi^2: ",chi2,"\n") diff --git a/tests/testthat/test-splitting-unordered.R b/tests/testthat/test-splitting-unordered.R index 4fbd4f3..4abe037 100644 --- a/tests/testthat/test-splitting-unordered.R +++ b/tests/testthat/test-splitting-unordered.R @@ -1,5 +1,3 @@ -#context("Testing unordered variable splits") - skip_on_cran() library(lavaan) diff --git a/tests/testthat/test-traverse.R b/tests/testthat/test-traverse.R index 79cb47d..78acae8 100644 --- a/tests/testthat/test-traverse.R +++ b/tests/testthat/test-traverse.R @@ -1,60 +1,75 @@ +test_that("traversal works correctly", { + library(lavaan) + + set.seed(5035) + N <- 200 -library(lavaan) + x <- rnorm(N) + y <- rnorm(N) -skip_on_cran() + pred1 <- ordered(sample(c(0, 1, 2), N, replace = TRUE)) + pred2 <- sample(0:10, N, replace = TRUE) -set.seed(5035) + y2 <- ifelse(pred2 > 5, 0.2 * y + 0.8 * x, x) -N <- 200 + model <- "x~~y; x~~x;y~~y" -x <- rnorm(N) -y <- rnorm(N) + df <- data.frame(x, y = y2, pred1, pred2) -pred1 <- ordered( sample(c(0,1,2),N,replace=TRUE) ) -pred2 <- sample(0:10, N, replace=TRUE) + fitted_model <- lavaan::lavaan(model, df) + tree <- semtree(fitted_model, df) + plot(tree) -y2 <- ifelse(pred2>5,0.2*y+0.8*x,x) + node_ids <- semtree:::traverse(tree, df) -model <- "x~~y; x~~x;y~~y" + node_ids_correct_top10 <- c(6, 2, 5, 2, 6, 2, 2, 7, 2, 2) -df <- data.frame(x,y=y2,pred1,pred2) -fitted_model <- lavaan(model,df) -tree <- semtree(fitted_model, df) -plot(tree) + expect_equal(node_ids[1:10], node_ids_correct_top10) +}) -node_ids = semtree:::traverse(tree, df) -node_ids_correct_top10 <- c(6,2,5,2,6,2,2,7,2,2) +test_that("traversal works correctly", { + library(lavaan) + + N <- 200 + + + set.seed(5035) + x <- rnorm(N) + y <- rnorm(N) + # + # further test for categorical variables + set.seed(3433) + pred3 <- factor(sample(c("red", "green", "blue", "yellow"), N, replace = TRUE)) + y2 <- ifelse(pred3 == "red" | pred3 == "yellow", 0.2 * y + 0.8 * x, 0.8 * y + 0.2 * x) -test_that("traversal works correctly", { expect_equal(node_ids[1:10], node_ids_correct_top10)}) + model <- "x~~y; x~~x;y~~y" -# -# further test for categorical variables -set.seed(3433) -pred3 <- factor(sample(c("red","green","blue","yellow"), N, replace=TRUE)) -y2 <- ifelse(pred3=="red" | pred3=="yellow",0.2*y+0.8*x,0.8*y+0.2*x) + df <- data.frame(x, y = y2, pred3) -model <- "x~~y; x~~x;y~~y" + fitted_model <- lavaan::lavaan(model, df) + tree <- semtree(fitted_model, df) + plot(tree) -df <- data.frame(x,y=y2,pred3) + node_ids <- semtree:::traverse(tree, df) -fitted_model <- lavaan(model,df) -tree <- semtree(fitted_model, df) -plot(tree) + node_ids_correct_top10 <- c(2, 2, 3, 2, 2, 2, 2, 2, 3, 2) -node_ids = semtree:::traverse(tree, df) -node_ids_correct_top10 <- c(2,2,3,2,2,2,2,2,3,2) + expect_equal( + node_ids[1:10], + node_ids_correct_top10 + ) -test_that("traversal works correctly", { expect_equal(node_ids[1:10], - node_ids_correct_top10)}) + expect_equal(as.character(tree$rule$value), c("blue", "green")) +}) -test_that("rule is correct", {expect_equal(as.character(tree$rule$value),c("blue","green"))}) -# more tests for stripped traversal -tree_stripped <- strip(tree) -traverse_stripped(df[1,],tree_stripped, what="parameters") -traverse_stripped(df[3,],tree_stripped, what="parameters") +# more tests for stripped traversal +# +# tree_stripped <- strip(tree) +# traverse_stripped(df[1,],tree_stripped, what="parameters") +# traverse_stripped(df[3,],tree_stripped, what="parameters") diff --git a/tests/testthat/test-vim.R b/tests/testthat/test-vim.R index e9bcb9f..c5901cf 100644 --- a/tests/testthat/test-vim.R +++ b/tests/testthat/test-vim.R @@ -1,3 +1,5 @@ +skip_on_cran() + imps <- c( 1:50, rep(c(10,20),each=25), diff --git a/tests/testthat/test-vim2.R b/tests/testthat/test-vim2.R index 0669b1e..0848f8c 100644 --- a/tests/testthat/test-vim2.R +++ b/tests/testthat/test-vim2.R @@ -1,3 +1,5 @@ +skip_on_cran() + set.seed(789) require("semtree") data(lgcm) diff --git a/tests/testthat/test.focus.R b/tests/testthat/test.focus.R index faa6c86..ffd5fc8 100644 --- a/tests/testthat/test.focus.R +++ b/tests/testthat/test.focus.R @@ -1,4 +1,7 @@ +skip_on_cran() + set.seed(123) + N <- 1000 grp1 <- ordered(sample(x = c(0,1), size=N, replace=TRUE)) grp2 <- ordered(sample(x = c(0,1), size=N, replace=TRUE)) diff --git a/tests/testthat/test_boruta.R b/tests/testthat/test_boruta.R index 57ea11e..9e5d981 100644 --- a/tests/testthat/test_boruta.R +++ b/tests/testthat/test_boruta.R @@ -1,3 +1,8 @@ + +# skip long running tests on CRAN +testthat::skip_on_cran() + + library("semtree") data(lgcm) @@ -70,7 +75,7 @@ model <- lgcModel data <- lgcm control <- semforest_score_control() -vim_boruta <- boruta(lgcModel, lgcm) +vim_boruta <- boruta(lgcModel, lgcm,percentile_threshold = 1) print(vim_boruta) diff --git a/tests/testthat/testthat.R b/tests/testthat/testthat.R index a344d33..7437fb0 100644 --- a/tests/testthat/testthat.R +++ b/tests/testthat/testthat.R @@ -1,2 +1,5 @@ +# skip long running tests on CRAN +skip_on_cran() + library(testthat) test_check("semtree") \ No newline at end of file diff --git a/tests/vim.R b/tests/vim.R index 38802f8..bf3aaca 100644 --- a/tests/vim.R +++ b/tests/vim.R @@ -4,7 +4,7 @@ data(lgcm) lgcm$agegroup <- ordered(lgcm$agegroup) lgcm$training <- factor(lgcm$training) -lgcm$noise <- as.numeric(lgcm$noise) +lgcm$noise <- factor(lgcm$noise) # LOAD IN OPENMX MODEL. # A SIMPLE LINEAR GROWTH MODEL WITH 5 TIME POINTS FROM SIMULATED DATA @@ -68,7 +68,10 @@ lgcModel <- mxModel("Linear Growth Curve Model Path Specification", ) -fr <- semforest(lgcModel, lgcm,control = semforest.control(num.trees = 3)) +fr <- semforest(lgcModel, + lgcm, + control = semforest.control(num.trees = 3, + control=semtree.control(method="score",alpha = 1))) vimp <- varimp(fr) diff --git a/vignettes/constraints.Rmd b/vignettes/constraints.Rmd index 4f7b566..d72e27a 100644 --- a/vignettes/constraints.Rmd +++ b/vignettes/constraints.Rmd @@ -29,7 +29,7 @@ library(MASS) `semtree` allows different constraints on the split evaluation (`global.invariance`, `local.invariance`, and `focus.parameters`). These can be set in the following object and then passed to the `semtree` command: -```{r eval=FALSE, message=FALSE, warning=FALSE} +```{r eval=FALSE, message=FALSE, warning=FALSE, results="hide"} library(semtree) cnst <- semtree.constraints(local.invariance=NULL, @@ -117,7 +117,7 @@ Even though we retrieve both predictors in the tree, the tree structure does not ## Local Invariance -Local invariance builds a tree under which all parameters across the leafs of a tree may differ but the chosen parameters may not differ significantly from each other. If they differed significantly, the respective split is not considered a valid split and is not chosen. Local constraints are implemented by means of an additional test of measurement invariance. For each possible split, we fit an additional null model, in which the locally invariant parameters are constrained to be equal across the two resulting daugther nodes of a split. Only if we reject this null hypothesis, we believe that there is measurement non-invariance and disregard the split. A typical use-case is to the set of loadings of a factor model as `local.invariance` to allow a tree with weakly measurement-invariant leafs. +Local invariance builds a tree under which all parameters across the leafs of a tree may differ but the chosen parameters may not differ significantly from each other. If they differed significantly, the respective split is not considered a valid split and is not chosen. Local constraints are implemented by means of an additional test of measurement invariance. For each possible split, we fit an additional null model, in which the locally invariant parameters are constrained to be equal across the two resulting daughter nodes of a split. Only if we reject this null hypothesis, we believe that there is measurement non-invariance and disregard the split. A typical use-case is to the set of loadings of a factor model as `local.invariance` to allow a tree with weakly measurement-invariant leafs. ```{r warning=FALSE, message=FALSE, error=FALSE, echo=TRUE} diff --git a/vignettes/forests.Rmd b/vignettes/forests.Rmd index 5b2e3c6..6a73c48 100644 --- a/vignettes/forests.Rmd +++ b/vignettes/forests.Rmd @@ -17,6 +17,10 @@ knitr::opts_chunk$set( library(semtree) ``` +This example demonstrates how SEM forests can be grown. SEM forests are ensembles of typically hundreds to thousands of SEM trees. Using permutation-based variable importance estimates, we can aggregate the importance of each predictor for improving model fit. + +Here, we use the `affect` dataset and a simple SEM with only a single observed variable and no latent variables. + ## Load data Load affect dataset from the `psychTools` package. These are data from two studies conducted in the Personality, Motivation and Cognition Laboratory at Northwestern University to study affect dimensionality and the relationship to various personality dimensions. @@ -62,16 +66,16 @@ summary(result) ## Forest -Create a forest control object that stores all tuning parameters of the forest. Note that we use only 5 trees for demo purposes. Please increase the number in real applications. +Create a forest control object that stores all tuning parameters of the forest. Note that we use only 5 trees for illustration. Please increase the number in real applications to several hundreds. To speed up computation time, consider score-based test for variable selection in the trees. ```{r} -control <- semforest.control(num.trees = 5) +control <- semforest_control(num.trees = 5) print(control) ``` Now, run the forest using the `control` object: -```{r message=FALSE, echo=TRUE, warning=FALSE} +```{r message=FALSE, echo=TRUE, warning=FALSE, results="hide"} forest <- semforest( model=model, data = affect, control = control, @@ -81,7 +85,7 @@ forest <- semforest( model=model, ## Variable importance -Next, we compute permutation-based variable importance. +Next, we compute permutation-based variable importance. This may take some time. ```{r} vim <- varimp(forest) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index 9044663..e6b2f0f 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -16,6 +16,8 @@ knitr::opts_chunk$set( ## Load the Package +We first load the `semtree` package and the `OpenMx` package for specifying our SEM. + ```{r setup} library(semtree) library(OpenMx) @@ -23,6 +25,8 @@ library(OpenMx) ## Simulate data +Now, we simulate some data from a linear latent growth curve model (that is, a random intercept and random slope over time). The dataset will be called `growth.data`. The dataset contains five observations for each individual (`X1` to `X5`) and one predictor `P1`. The predictor is dichotomous and predicts a (quite large) difference in mean slope. + ```{r simdata} set.seed(23) N <- 1000 @@ -35,12 +39,14 @@ x <- (slope + p1 * 5) %*% t(loadings) + matrix(rep(icept, each = M), byrow = TRUE, ncol = M) + rnorm(N * M, sd = .08) -growth.data <- data.frame(x, p1) +growth.data <- data.frame(x, factor(p1)) names(growth.data) <- c(paste0("X", 1:M), "P1") ``` ## Specify an OpenMx model +Now, we specify a linear latent growth curve model using OpenMx's path specification. The model has five observed variables. Residual variances are assumed to be identical over time. + ```{r} manifests <- names(growth.data)[1:5] growthCurveModel <- mxModel("Linear Growth Curve Model Path Specification", @@ -99,16 +105,24 @@ growthCurveModel <- mxModel("Linear Growth Curve Model Path Specification", labels=c("meani", "means") ) ) # close model + +# fit the model to the entire dataset +growthCurveModel <- mxRun(growthCurveModel) ``` ## Run a tree -```{r} -tree <- semtree(model = growthCurveModel, data = growth.data) +Now, we grow a SEM tree using the `semtree` function, which takes the model and the dataset as input. If not specified otherwise, SEM tree will assume that all variables in the dataset, which are not observed variables in the dataset are potential predictors. + +```{r message=FALSE,warning=FALSE,results="hide"} +tree <- semtree(model = growthCurveModel, + data = growth.data) ``` ## Plotting +Once the tree is grown, we can plot it: + ```{r} plot(tree) ``` \ No newline at end of file diff --git a/vignettes/score-based-tests.Rmd b/vignettes/score-based-tests.Rmd index b349843..9a53fe7 100644 --- a/vignettes/score-based-tests.Rmd +++ b/vignettes/score-based-tests.Rmd @@ -1,5 +1,5 @@ --- -title: "Score-based Tests" +title: "SEM Trees with score-based tests" author: "Andreas M. Brandmaier" date: "`r Sys.Date()`" output: rmarkdown::html_vignette @@ -16,7 +16,9 @@ knitr::opts_chunk$set( ) ``` +In this example, we will explore how score-based SEM trees can be used. Score-based tests for variable and split-point selection are preferable because they are fast to compute, perform unbiased variable selection, and have better statistical power than some other selection algorithms proposed earlier. +For this illustration, we will use the `affect` dataset and a simple (non-latent) SEM, which just has one observed variable. Using such a simple model is similar to using a conventional decision tree or random forest; however, the SEM tree version not only finds differences in the mean prediction across leafs but also in the variances (ie., individual differences) of the criterion variable. ## Load data @@ -68,7 +70,7 @@ ctrl = semtree.control( bonferroni = TRUE) ``` -```{r message=FALSE, warning=FALSE} +```{r message=FALSE, warning=FALSE, results="hide"} tree = semtree( model = result, data = tree.data, control=ctrl) @@ -89,16 +91,16 @@ cols <- viridis::plasma(nrow(tndata)) pl <- ggplot2::ggplot(data = data.frame(x = c(-20, 20)), ggplot2::aes(x))+ ggplot2::xlab("Change in Positive Affect") + for (i in 1:nrow(tndata)) { pl <- pl + ggplot2::stat_function(fun = dnorm, - n = 101, col=cols[i], args = list(mean = tndata[i,2], - sd = sqrt(tndata[i,1]))) + n = 101, col=cols[i], args = list(mean = tndata[i,2], sd = sqrt(tndata[i,1]))) } plot(pl) ``` -Let's inspect the group with the largest negative change: +Let us inspect the group with the largest negative change. This is the group of participants, who started out with a high positive affect and then had to watch a war movie. ```{r dpi=300, out.width="100%"} i <- which.min(tndata$mu) diff --git a/vignettes/semforest-focus.Rmd b/vignettes/semforest-focus.Rmd index e17761f..9f7fa6a 100644 --- a/vignettes/semforest-focus.Rmd +++ b/vignettes/semforest-focus.Rmd @@ -45,7 +45,7 @@ names(df.biv)[1:2] <- paste0("x",1:2) manifests<-c("x1","x2") ``` -The following code specifices a bivariate Gaussian model with five parameters: +The following code specifies a bivariate Gaussian model with five parameters: ```{r} model.biv <- mxModel("Bivariate_Model", @@ -78,12 +78,14 @@ df.biv.pred <- data.frame(df.biv, Now, we choose the mean of the second dimension `mu2` as focus parameter. We expect that only predictor `grp2`. This is what we see in a single tree. -```{r message=FALSE,eval=TRUE} +```{r message=FALSE,eval=TRUE, results="hide"} fp <- "mu2" # predicted by grp2 #fp <- "mu1" # predicted by grp1 tree.biv <- semtree(model.biv, data=df.biv, constraints = list(focus.parameters=fp)) +``` +```{r} plot(tree.biv) ``` @@ -91,7 +93,7 @@ plot(tree.biv) Now, we are repeating the same analysis in a forest. -```{r message=FALSE, warning=FALSE} +```{r message=FALSE, warning=FALSE,results="hide"} forest <- semforest(model.biv, data=df.biv, constraints = list(focus.parameters=fp), control=semforest.control(num.trees=10, control=semtree.control(method="score",alpha=1)))