diff --git a/DESCRIPTION b/DESCRIPTION index 7f7a22354..dd31fe232 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: parameters Title: Processing of Model Parameters -Version: 0.28.2.3 +Version: 0.28.2.4 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -148,6 +148,7 @@ Suggests: ivreg, knitr, lavaan, + lcmm, lfe, lm.beta, lme4, diff --git a/NAMESPACE b/NAMESPACE index ca1b69d08..159573895 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -243,6 +243,8 @@ S3method(model_parameters,draws) S3method(model_parameters,emmGrid) S3method(model_parameters,emm_list) S3method(model_parameters,epi.2by2) +S3method(model_parameters,externVar) +S3method(model_parameters,externX) S3method(model_parameters,estimate_contrasts) S3method(model_parameters,estimate_means) S3method(model_parameters,estimate_slopes) @@ -275,6 +277,7 @@ S3method(model_parameters,ivFixed) S3method(model_parameters,ivprobit) S3method(model_parameters,kmeans) S3method(model_parameters,lavaan) +S3method(model_parameters,lcmm) S3method(model_parameters,list) S3method(model_parameters,lm_robust) S3method(model_parameters,lme) @@ -457,6 +460,8 @@ S3method(p_value,draws) S3method(p_value,eglm) S3method(p_value,emmGrid) S3method(p_value,emm_list) +S3method(p_value,externVar) +S3method(p_value,externX) S3method(p_value,feglm) S3method(p_value,fixest_multi) S3method(p_value,flac) @@ -482,6 +487,7 @@ S3method(p_value,ivFixed) S3method(p_value,ivprobit) S3method(p_value,ivreg) S3method(p_value,lavaan) +S3method(p_value,lcmm) S3method(p_value,list) S3method(p_value,lm) S3method(p_value,lm_robust) @@ -797,6 +803,8 @@ S3method(standard_error,draws) S3method(standard_error,effectsize_table) S3method(standard_error,emmGrid) S3method(standard_error,emm_list) +S3method(standard_error,externVar) +S3method(standard_error,externX) S3method(standard_error,estimate_contrasts) S3method(standard_error,estimate_means) S3method(standard_error,estimate_slopes) @@ -829,6 +837,7 @@ S3method(standard_error,ivFixed) S3method(standard_error,ivprobit) S3method(standard_error,ivreg) S3method(standard_error,lavaan) +S3method(standard_error,lcmm) S3method(standard_error,list) S3method(standard_error,lm_robust) S3method(standard_error,lme) diff --git a/NEWS.md b/NEWS.md index 266a10cba..6e74de38f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,9 @@ -# parameters 0.28.2.1 +# parameters (devel) * `equivalence_test()` gets methods for objects from the *modelbased* package. +* Added support for package *lcmm*. + # parameters 0.28.2 ## Bug fixes diff --git a/R/ci_generic.R b/R/ci_generic.R index 89365a2a0..9a9341df0 100644 --- a/R/ci_generic.R +++ b/R/ci_generic.R @@ -64,17 +64,19 @@ #' @keywords internal -.ci_dof <- function(model, - ci, - dof, - effects, - component, - method = "wald", - se = NULL, - vcov = NULL, - vcov_args = NULL, - verbose = TRUE, - ...) { +.ci_dof <- function( + model, + ci, + dof, + effects, + component, + method = "wald", + se = NULL, + vcov = NULL, + vcov_args = NULL, + verbose = TRUE, + ... +) { # need parameters to calculate the CIs if (inherits(model, "emmGrid")) { params <- insight::get_parameters( @@ -84,7 +86,8 @@ merge_parameters = TRUE ) } else { - params <- insight::get_parameters(model, + params <- insight::get_parameters( + model, effects = effects, component = component, verbose = FALSE @@ -110,7 +113,8 @@ if (is.null(se)) { if (!is.null(vcov) || isTRUE(list(...)[["robust"]])) { # robust (HC) standard errors? - stderror <- standard_error(model, + stderror <- standard_error( + model, component = component, vcov = vcov, vcov_args = vcov_args, @@ -119,7 +123,8 @@ ) } else { # normal standard errors, including small-sample approximations - stderror <- switch(method, + stderror <- switch( + method, kenward = se_kenward(model), kr = se_kenward(model), satterthwaite = se_satterthwaite(model), @@ -134,9 +139,11 @@ # filter non-matching parameters, resp. sort stderror and parameters, # so both have the identical order of values - if (nrow(stderror) != nrow(params) || - !all(stderror$Parameter %in% params$Parameter) || - !all(order(stderror$Parameter) == order(params$Parameter))) { + if ( + nrow(stderror) != nrow(params) || + !all(stderror$Parameter %in% params$Parameter) || + !all(order(stderror$Parameter) == order(params$Parameter)) + ) { params <- stderror <- merge(stderror, params, sort = FALSE) } @@ -164,19 +171,25 @@ alpha <- (1 + ci) / 2 fac <- suppressWarnings(stats::qt(alpha, df = dof)) - out <- cbind( - CI_low = params$Estimate - se * fac, - CI_high = params$Estimate + se * fac - ) + out <- cbind(CI_low = params$Estimate - se * fac, CI_high = params$Estimate + se * fac) out <- as.data.frame(out) out$CI <- ci out$Parameter <- params$Parameter out <- out[c("Parameter", "CI", "CI_low", "CI_high")] - if ("Component" %in% names(params)) out$Component <- params$Component - if ("Effects" %in% names(params) && effects != "fixed") out$Effects <- params$Effects - if ("Response" %in% names(params)) out$Response <- params$Response + if ("Component" %in% names(params)) { + out$Component <- params$Component + } + if ("Effects" %in% names(params) && effects != "fixed") { + out$Effects <- params$Effects + } + if ("Response" %in% names(params)) { + out$Response <- params$Response + } + if ("Group" %in% names(params) && inherits(model, c("lcmm", "externX", "externVar"))) { + out$Group <- params$Group + } # for cox-panel models, we have non-linear parameters with NA coefficient, # but test statistic and p-value - don't check for NA estimates in this case diff --git a/R/extract_parameters.R b/R/extract_parameters.R index 329d504ef..347203320 100644 --- a/R/extract_parameters.R +++ b/R/extract_parameters.R @@ -1,25 +1,26 @@ # generic function ------------------------------------------------------ - #' @keywords internal -.extract_parameters_generic <- function(model, - ci, - component, - merge_by = c("Parameter", "Component"), - standardize = NULL, - effects = "fixed", - ci_method = NULL, - p_adjust = NULL, - wb_component = FALSE, - verbose = TRUE, - keep_component_column = FALSE, - keep_parameters = NULL, - drop_parameters = NULL, - include_sigma = TRUE, - include_info = FALSE, - vcov = NULL, - vcov_args = NULL, - ...) { +.extract_parameters_generic <- function( + model, + ci, + component, + merge_by = c("Parameter", "Component"), + standardize = NULL, + effects = "fixed", + ci_method = NULL, + p_adjust = NULL, + wb_component = FALSE, + verbose = TRUE, + keep_component_column = FALSE, + keep_parameters = NULL, + drop_parameters = NULL, + include_sigma = TRUE, + include_info = FALSE, + vcov = NULL, + vcov_args = NULL, + ... +) { dots <- list(...) # ==== check if standardization is required and package available @@ -39,7 +40,6 @@ merge_by <- c("Parameter", "Component") } - # ==== for refit, we completely refit the model, than extract parameters, ci etc. as usual if (isTRUE(standardize == "refit")) { @@ -50,7 +50,8 @@ model <- do.call(fun, fun_args) } - parameters <- insight::get_parameters(model, + parameters <- insight::get_parameters( + model, effects = effects, component = component, verbose = FALSE @@ -60,13 +61,15 @@ # check if all estimates are non-NA parameters <- .check_rank_deficiency(model, parameters) - # ==== check if we really have a component column if (!("Component" %in% names(parameters)) && "Component" %in% merge_by) { merge_by <- setdiff(merge_by, "Component") } + if (!("Group" %in% names(parameters)) && "Group" %in% merge_by) { + merge_by <- setdiff(merge_by, "Group") + } # ==== check Degrees of freedom @@ -74,7 +77,6 @@ ci_method <- NULL } - # ==== for ordinal models, first, clean parameter names and then indicate # intercepts (alpha-coefficients) in the component column @@ -103,13 +105,13 @@ # column name for coefficients, non-standardized coef_col <- "Coefficient" - # ==== CI - only if we don't already have CI for std. parameters ci_cols <- NULL if (!is.null(ci)) { # set up arguments for CI function - fun_args <- list(model, + fun_args <- list( + model, ci = ci, component = component, vcov = vcov, @@ -134,10 +136,10 @@ } } - # ==== p value - fun_args <- list(model, + fun_args <- list( + model, method = ci_method, effects = effects, verbose = verbose, @@ -152,11 +154,11 @@ parameters <- merge(parameters, pval, by = merge_by, sort = FALSE) } - # ==== standard error - only if we don't already have SE for std. parameters std_err <- NULL - fun_args <- list(model, + fun_args <- list( + model, effects = effects, component = component, verbose = verbose, @@ -173,7 +175,6 @@ parameters <- merge(parameters, std_err, by = merge_by, sort = FALSE) } - # ==== test statistic - fix values for robust vcov if (!is.null(vcov)) { @@ -182,7 +183,6 @@ parameters <- merge(parameters, statistic, by = merge_by, sort = FALSE) } - # ==== degrees of freedom if (is.null(ci_method)) { @@ -200,12 +200,10 @@ } } - # ==== Rematch order after merging parameters <- parameters[match(original_order, parameters$.id), ] - # ==== Renaming if ("Statistic" %in% colnames(parameters)) { @@ -223,7 +221,6 @@ colnames(parameters) <- gsub("(c|C)hisq", "Chi2", colnames(parameters)) colnames(parameters) <- gsub("Estimate", "Coefficient", colnames(parameters), fixed = TRUE) - # ==== add intercept groups for ordinal models if (inherits(model, "polr") && !is.null(intercept_groups)) { @@ -233,43 +230,49 @@ parameters$Component <- intercept_groups } - # ==== remove Component / Effects column if not needed - if (!is.null(parameters$Component) && insight::has_single_value(parameters$Component, remove_na = TRUE) && !keep_component_column) parameters$Component <- NULL # nolint - if ((!is.null(parameters$Effects) && insight::n_unique(parameters$Effects) == 1) || effects == "fixed") parameters$Effects <- NULL # nolint - + if ( + !is.null(parameters$Component) && + insight::has_single_value(parameters$Component, remove_na = TRUE) && + !keep_component_column + ) { + parameters$Component <- NULL + } + if ( + (!is.null(parameters$Effects) && insight::n_unique(parameters$Effects) == 1) || + effects == "fixed" + ) { + parameters$Effects <- NULL + } # ==== filter parameters, if requested if (!is.null(keep_parameters) || !is.null(drop_parameters)) { - parameters <- .filter_parameters(parameters, + parameters <- .filter_parameters( + parameters, keep = keep_parameters, drop = drop_parameters, verbose = verbose ) } - # ==== adjust p-values? if (!is.null(p_adjust)) { parameters <- .p_adjust(parameters, p_adjust, model, verbose) } - # ==== remove all complete-missing cases parameters <- parameters[apply(parameters, 1, function(i) !all(is.na(i))), ] - # ==== add within/between attributes if (inherits(model, c("glmmTMB", "MixMod")) && isTRUE(wb_component)) { parameters <- .add_within_between_effects(model, parameters) } - # ==== Std Coefficients for other methods than "refit" if (!is.null(standardize) && !isTRUE(standardize == "refit")) { @@ -291,9 +294,9 @@ coef_col <- "Std_Coefficient" } - # ==== Reorder + # fmt: skip col_order <- c( "Parameter", coef_col, "SE", ci_cols, "t", "z", "t / F", "t/F", "z / Chisq", "z/Chisq", "z / Chi2", "z/Chi2", "F", "Chi2", @@ -302,14 +305,12 @@ ) parameters <- parameters[col_order[col_order %in% names(parameters)]] - # ==== add sigma and residual df if (isTRUE(include_sigma) || isTRUE(include_info)) { parameters <- .add_sigma_residual_df(parameters, model) } - rownames(parameters) <- NULL parameters } diff --git a/R/methods_hclust.R b/R/methods_hclust.R index f25eb916b..5b3fc4879 100644 --- a/R/methods_hclust.R +++ b/R/methods_hclust.R @@ -8,7 +8,7 @@ #' rows in data). #' @param ... Arguments passed to or from other methods. #' -#' @examplesIf require("factoextra", quietly = TRUE) && require("dbscan", quietly = TRUE) && require("cluster", quietly = TRUE) && require("fpc", quietly = TRUE) +#' @examplesIf all(insight::check_if_installed(c("dbscan", "cluster", "fpc"), quietly = TRUE)) #' \donttest{ #' # #' # K-means ------------------------------- @@ -46,23 +46,6 @@ #' attributes(rez)$Between_Sum_Squares #' #' # -#' # Hierarchical K-means (factoextra::hkclust) ---------------------- -#' data <- iris[1:4] -#' model <- factoextra::hkmeans(data, k = 3) -#' -#' rez <- model_parameters(model) -#' rez -#' -#' # Get clusters -#' predict(rez) -#' -#' # Clusters centers in long form -#' attributes(rez)$means -#' -#' # Between and Total Sum of Squares -#' attributes(rez)$Sum_Squares_Total -#' attributes(rez)$Sum_Squares_Between -#' #' # K-Medoids (PAM and HPAM) ============== #' model <- cluster::pam(iris[1:4], k = 3) #' model_parameters(model) diff --git a/R/methods_lcmm.R b/R/methods_lcmm.R new file mode 100644 index 000000000..f4737b665 --- /dev/null +++ b/R/methods_lcmm.R @@ -0,0 +1,126 @@ +#' @export +model_parameters.lcmm <- function( + model, + ci = 0.95, + ci_method = "residual", + component = "all", + p_adjust = NULL, + keep = NULL, + drop = NULL, + verbose = TRUE, + ... +) { + out <- .model_parameters_generic( + model = model, + effects = "all", + component = component, + ci = ci, + ci_method = ci_method, + merge_by = c("Parameter", "Component", "Group"), + p_adjust = p_adjust, + keep_parameters = keep, + drop_parameters = drop, + include_info = FALSE, + verbose = verbose, + ... + ) + + attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(model)) + out +} + +#' @export +model_parameters.externVar <- function( + model, + ci = 0.95, + ci_method = "residual", + component = "all", + p_adjust = NULL, + keep = NULL, + drop = NULL, + verbose = TRUE, + ... +) { + out <- .model_parameters_generic( + model = model, + effects = "all", + component = component, + ci = ci, + ci_method = ci_method, + merge_by = c("Parameter", "Group"), + p_adjust = p_adjust, + keep_parameters = keep, + drop_parameters = drop, + include_info = FALSE, + verbose = verbose, + ... + ) + + attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(model)) + out +} + +#' @export +model_parameters.externX <- model_parameters.externVar + + +# p-values ---------------------------------------------------------------- + +# Helper to format output for lcmm methods +.format_lcmm_output <- function(model, value, colname, component, ...) { + component <- insight::validate_argument( + component, + c("all", "conditional", "membership", "longitudinal", "beta", "splines", "linear") + ) + + out <- insight::get_parameters(model, component = "all", ...) + out[[colname]] <- as.vector(value) + + if (component != "all") { + out <- out[out$Component == component, , drop = FALSE] + } + + # clean up + out$Estimate <- NULL + out <- out[intersect(c("Parameter", colname, "Component", "Group"), colnames(out))] + + insight::text_remove_backticks(out, verbose = FALSE) +} + + +#' @export +p_value.lcmm <- function(model, component = "all", ...) { + id <- seq_along(model$best) + indice <- rep(id * (id + 1) / 2) + se <- sqrt(model$V[indice]) + statistic <- model$best / se + p <- 2 * stats::pt(abs(statistic), df = Inf, lower.tail = FALSE) + + p <- p[!startsWith(names(model$best), "cholesky ") & !startsWith(names(model$best), "varcov ")] + .format_lcmm_output(model, p, "p", component, ...) +} + +#' @export +p_value.externX <- p_value.lcmm + +#' @export +p_value.externVar <- p_value.lcmm + + +# standard errors ------------------------------------------------------------- + +#' @export +standard_error.lcmm <- function(model, component = "all", ...) { + id <- seq_along(model$best) + indice <- rep(id * (id + 1) / 2) + se <- sqrt(model$V[indice]) + + se <- se[!startsWith(names(model$best), "cholesky ") & !startsWith(names(model$best), "varcov ")] + .format_lcmm_output(model, se, "SE", component, ...) +} + +#' @export +standard_error.externX <- standard_error.lcmm + +#' @export +standard_error.externVar <- standard_error.lcmm diff --git a/R/n_clusters_easystats.R b/R/n_clusters_easystats.R index 204d3eb00..dee0b4544 100644 --- a/R/n_clusters_easystats.R +++ b/R/n_clusters_easystats.R @@ -4,15 +4,18 @@ #' x <- n_clusters_elbow(iris[1:4]) #' x #' as.data.frame(x) -#' plot(x) +#' # plotting is also possible: +#' # plot(x) #' } #' @export -n_clusters_elbow <- function(x, - standardize = TRUE, - include_factors = FALSE, - clustering_function = stats::kmeans, - n_max = 10, - ...) { +n_clusters_elbow <- function( + x, + standardize = TRUE, + include_factors = FALSE, + clustering_function = stats::kmeans, + n_max = 10, + ... +) { t0 <- Sys.time() out <- .n_clusters_factoextra( x, @@ -93,7 +96,8 @@ n_clusters_gap <- function(x, #' x <- n_clusters_silhouette(iris[1:4]) #' x #' as.data.frame(x) -#' plot(x) +#' # plotting is also possible: +#' # plot(x) #' } #' } #' @export diff --git a/man/model_parameters.hclust.Rd b/man/model_parameters.hclust.Rd index 2eaaae075..c98208852 100644 --- a/man/model_parameters.hclust.Rd +++ b/man/model_parameters.hclust.Rd @@ -20,7 +20,7 @@ rows in data).} Format cluster models obtained for example by \code{\link[=kmeans]{kmeans()}}. } \examples{ -\dontshow{if (require("factoextra", quietly = TRUE) && require("dbscan", quietly = TRUE) && require("cluster", quietly = TRUE) && require("fpc", quietly = TRUE)) withAutoprint(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("dbscan", "cluster", "fpc"), quietly = TRUE))) withAutoprint(\{ # examplesIf} \donttest{ # # K-means ------------------------------- @@ -58,23 +58,6 @@ attributes(rez)$Total_Sum_Squares attributes(rez)$Between_Sum_Squares # -# Hierarchical K-means (factoextra::hkclust) ---------------------- -data <- iris[1:4] -model <- factoextra::hkmeans(data, k = 3) - -rez <- model_parameters(model) -rez - -# Get clusters -predict(rez) - -# Clusters centers in long form -attributes(rez)$means - -# Between and Total Sum of Squares -attributes(rez)$Sum_Squares_Total -attributes(rez)$Sum_Squares_Between - # K-Medoids (PAM and HPAM) ============== model <- cluster::pam(iris[1:4], k = 3) model_parameters(model) diff --git a/man/n_clusters.Rd b/man/n_clusters.Rd index bdf079a54..9886896fb 100644 --- a/man/n_clusters.Rd +++ b/man/n_clusters.Rd @@ -172,7 +172,8 @@ if (require("mclust", quietly = TRUE) && require("NbClust", quietly = TRUE) && x <- n_clusters_elbow(iris[1:4]) x as.data.frame(x) -plot(x) +# plotting is also possible: +# plot(x) } \dontshow{\}) # examplesIf} \donttest{ @@ -194,7 +195,8 @@ if (require("factoextra", quietly = TRUE)) { x <- n_clusters_silhouette(iris[1:4]) x as.data.frame(x) - plot(x) + # plotting is also possible: + # plot(x) } } \donttest{ diff --git a/tests/testthat/_snaps/lcmm.md b/tests/testthat/_snaps/lcmm.md new file mode 100644 index 000000000..d74cb291d --- /dev/null +++ b/tests/testthat/_snaps/lcmm.md @@ -0,0 +1,257 @@ +# model_parameters lcmm + + Code + print(model_parameters(out$mx_linear), zap_small = TRUE) + Output + Parameter | Log-Odds | SE | 95% CI | z | p + --------------------------------------------------------------------------- + intercept class1 | 1.12 | 0.90 | [ -0.64, 2.89] | 1.25 | 0.211 + intercept class2 | 1.19 | 0.74 | [ -0.26, 2.65] | 1.61 | 0.107 + X1 class1 | 1.85 | 13.74 | [ -25.08, 28.77] | 0.13 | 0.893 + X1 class2 | 4.12 | 12.32 | [ -20.03, 28.26] | 0.33 | 0.738 + X2 class1 | 10.50 | 2079.25 | [-4064.75, 4085.74] | 0.01 | 0.996 + X2 class2 | 9.64 | 2078.80 | [-4064.73, 4084.02] | 0.00 | 0.996 + X3 class1 | -0.60 | 0.90 | [ -2.36, 1.17] | -0.66 | 0.508 + X3 class2 | 0.12 | 0.55 | [ -0.95, 1.19] | 0.23 | 0.822 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + + The model has a log- or logit-link. Consider using `exponentiate = + TRUE` to interpret coefficients as ratios. + + Some coefficients are very large, which may indicate issues with + complete separation. + +--- + + Code + print(model_parameters(out$mx_beta), zap_small = TRUE) + Output + Parameter | Log-Odds | SE | 95% CI | z | p + ----------------------------------------------------------------------- + intercept class1 | 0.12 | 67.86 | [-132.89, 133.13] | 0.00 | 0.999 + intercept class2 | 1.55 | 35.86 | [ -68.73, 71.83] | 0.04 | 0.966 + X1 class1 | 0.51 | 24.20 | [ -46.92, 47.94] | 0.02 | 0.983 + X1 class2 | -1.02 | 26.22 | [ -52.41, 50.36] | -0.04 | 0.969 + X2 class1 | 15.95 | 14.97 | [ -13.39, 45.28] | 1.07 | 0.287 + X2 class2 | 15.44 | 9.01 | [ -2.22, 33.09] | 1.71 | 0.087 + X3 class1 | 0.44 | 14.56 | [ -28.09, 28.97] | 0.03 | 0.976 + X3 class2 | -0.35 | 18.61 | [ -36.81, 36.12] | -0.02 | 0.985 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + +--- + + Code + print(model_parameters(out$mx_splines), zap_small = TRUE) + Output + Parameter | Log-Odds | SE | 95% CI | z | p + --------------------------------------------------------------------------- + intercept class1 | -1.46 | 0.83 | [ -3.09, 0.17] | -1.75 | 0.080 + intercept class2 | -1.53 | 0.83 | [ -3.16, 0.11] | -1.83 | 0.067 + X1 class1 | 1.58 | 0.71 | [ 0.18, 2.97] | 2.22 | 0.027 + X1 class2 | 0.86 | 1.83 | [ -2.73, 4.45] | 0.47 | 0.639 + X2 class1 | 0.47 | 0.98 | [ -1.44, 2.38] | 0.48 | 0.631 + X2 class2 | -21.31 | 1194.95 | [-2363.38, 2320.75] | -0.02 | 0.986 + X3 class1 | 0.80 | 0.38 | [ 0.05, 1.54] | 2.10 | 0.036 + X3 class2 | 0.30 | 0.63 | [ -0.95, 1.54] | 0.47 | 0.638 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + +--- + + Code + print(model_parameters(out$m1_linear), zap_small = TRUE) + Output + # longitudinal + + Parameter | Coefficient | SE | 95% CI | z | p + ---------------------------------------------------------------- + Time | -0.78 | 0.19 | [-1.15, -0.40] | -4.09 | < .001 + I(Time^2) | -0.18 | 0.08 | [-0.33, -0.04] | -2.44 | 0.015 + + # linear + + Parameter | Coefficient | SE | 95% CI | z | p + ---------------------------------------------------------------------------- + Linear 1 (intercept) | 25.40 | 0.23 | [24.96, 25.85] | 112.34 | < .001 + Linear 2 (std err) | 2.24 | 0.04 | [ 2.15, 2.33] | 50.37 | < .001 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + +--- + + Code + print(model_parameters(out$m1_beta), zap_small = TRUE) + Output + # longitudinal + + Parameter | Coefficient | SE | 95% CI | z | p + ---------------------------------------------------------------- + Time | -0.88 | 0.19 | [-1.25, -0.51] | -4.62 | < .001 + I(Time^2) | -0.10 | 0.08 | [-0.25, 0.05] | -1.34 | 0.180 + + # beta + + Parameter | Coefficient | SE | 95% CI | z | p + ---------------------------------------------------------------- + Beta1 | 0.54 | 0.07 | [ 0.41, 0.68] | 7.97 | < .001 + Beta2 | -0.76 | 0.09 | [-0.94, -0.58] | -8.36 | < .001 + Beta3 | 0.70 | 0.02 | [ 0.67, 0.74] | 42.20 | < .001 + Beta4 | 0.09 | 0.00 | [ 0.08, 0.10] | 20.53 | < .001 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + +--- + + Code + print(model_parameters(out$m1_splines), zap_small = TRUE) + Output + # longitudinal + + Parameter | Coefficient | SE | 95% CI | z | p + ---------------------------------------------------------------- + Time | -0.89 | 0.19 | [-1.26, -0.52] | -4.68 | < .001 + I(Time^2) | -0.10 | 0.08 | [-0.25, 0.05] | -1.28 | 0.200 + + # splines + + Parameter | Coefficient | SE | 95% CI | z | p + ------------------------------------------------------------------ + I-splines1 | -7.76 | 0.59 | [-8.92, -6.59] | -13.04 | < .001 + I-splines2 | 0.76 | 0.28 | [ 0.21, 1.31] | 2.71 | 0.007 + I-splines3 | 0.81 | 0.41 | [ 0.00, 1.63] | 1.96 | 0.049 + I-splines4 | 1.44 | 0.13 | [ 1.18, 1.70] | 10.85 | < .001 + I-splines5 | -1.67 | 0.06 | [-1.80, -1.54] | -25.86 | < .001 + I-splines6 | 1.62 | 0.05 | [ 1.52, 1.72] | 32.56 | < .001 + I-splines7 | 1.30 | 0.05 | [ 1.20, 1.40] | 25.33 | < .001 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + +--- + + Code + print(model_parameters(out$m2_linear), zap_small = TRUE) + Output + # membership + + Parameter | Coefficient | SE | 95% CI | z | p + -------------------------------------------------------------------- + intercept class1 | 1.69 | 0.95 | [-0.18, 3.56] | 1.77 | 0.076 + intercept class2 | 2.47 | 0.79 | [ 0.91, 4.02] | 3.11 | 0.002 + + # longitudinal + + Parameter | Coefficient | SE | 95% CI | z | p + ---------------------------------------------------------------------- + intercept class2 | 0.68 | 0.33 | [ 0.03, 1.32] | 2.06 | 0.039 + intercept class3 | -0.63 | 0.68 | [-1.97, 0.70] | -0.93 | 0.353 + Time class1 | -1.12 | 0.56 | [-2.21, -0.03] | -2.01 | 0.045 + Time class2 | -0.37 | 0.25 | [-0.86, 0.13] | -1.44 | 0.151 + Time class3 | -2.90 | 1.48 | [-5.80, 0.00] | -1.96 | 0.050 + I(Time^2) class1 | -0.52 | 0.24 | [-0.98, -0.06] | -2.21 | 0.027 + I(Time^2) class2 | -0.15 | 0.09 | [-0.34, 0.03] | -1.67 | 0.094 + I(Time^2) class3 | 0.79 | 0.70 | [-0.59, 2.18] | 1.13 | 0.260 + + # linear + + Parameter | Coefficient | SE | 95% CI | z | p + --------------------------------------------------------------------------- + Linear 1 (intercept) | 24.46 | 0.60 | [23.28, 25.64] | 40.56 | < .001 + Linear 2 (std err) | 2.23 | 0.04 | [ 2.14, 2.32] | 50.22 | < .001 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + +--- + + Code + print(model_parameters(out$m2_beta), zap_small = TRUE) + Output + # membership + + Parameter | Coefficient | SE | 95% CI | z | p + -------------------------------------------------------------------- + intercept class1 | 0.93 | 1.48 | [-1.97, 3.83] | 0.63 | 0.531 + intercept class2 | 1.36 | 1.10 | [-0.79, 3.51] | 1.24 | 0.214 + + # longitudinal + + Parameter | Coefficient | SE | 95% CI | z | p + ----------------------------------------------------------------------- + intercept class2 | -0.54 | 0.44 | [-1.41, 0.33] | -1.22 | 0.221 + intercept class3 | -0.99 | 0.64 | [-2.25, 0.27] | -1.54 | 0.123 + Time class1 | -0.52 | 0.67 | [-1.83, 0.79] | -0.77 | 0.438 + Time class2 | -1.68 | 0.33 | [-2.32, -1.04] | -5.14 | < .001 + Time class3 | 1.61 | 1.09 | [-0.52, 3.74] | 1.48 | 0.138 + I(Time^2) class1 | 0.02 | 0.23 | [-0.43, 0.47] | 0.07 | 0.943 + I(Time^2) class2 | -0.07 | 0.13 | [-0.33, 0.20] | -0.50 | 0.618 + I(Time^2) class3 | -0.69 | 0.39 | [-1.46, 0.09] | -1.74 | 0.082 + + # beta + + Parameter | Coefficient | SE | 95% CI | z | p + ---------------------------------------------------------------- + Beta1 | 0.60 | 0.06 | [ 0.48, 0.73] | 9.34 | < .001 + Beta2 | -0.83 | 0.09 | [-1.01, -0.66] | -9.39 | < .001 + Beta3 | 0.73 | 0.03 | [ 0.67, 0.80] | 22.27 | < .001 + Beta4 | 0.09 | 0.00 | [ 0.08, 0.10] | 21.88 | < .001 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + +--- + + Code + print(model_parameters(out$m2_splines), zap_small = TRUE) + Output + # membership + + Parameter | Coefficient | SE | 95% CI | z | p + --------------------------------------------------------------------- + intercept class1 | -0.46 | 0.45 | [-1.33, 0.42] | -1.01 | 0.310 + intercept class2 | -1.40 | 1.06 | [-3.48, 0.68] | -1.32 | 0.186 + + # longitudinal + + Parameter | Coefficient | SE | 95% CI | z | p + ----------------------------------------------------------------------- + intercept class2 | -1.03 | 0.67 | [-2.34, 0.27] | -1.55 | 0.121 + intercept class3 | -0.55 | 0.41 | [-1.36, 0.26] | -1.34 | 0.182 + Time class1 | -0.48 | 0.59 | [-1.63, 0.67] | -0.81 | 0.415 + Time class2 | 1.65 | 1.10 | [-0.50, 3.80] | 1.50 | 0.133 + Time class3 | -1.72 | 0.32 | [-2.35, -1.09] | -5.36 | < .001 + I(Time^2) class1 | 0.01 | 0.20 | [-0.38, 0.40] | 0.03 | 0.972 + I(Time^2) class2 | -0.70 | 0.40 | [-1.49, 0.08] | -1.76 | 0.078 + I(Time^2) class3 | -0.04 | 0.13 | [-0.30, 0.22] | -0.33 | 0.743 + + # splines + + Parameter | Coefficient | SE | 95% CI | z | p + ------------------------------------------------------------------ + I-splines1 | -7.87 | 0.62 | [-9.09, -6.66] | -12.67 | < .001 + I-splines2 | 0.72 | 0.27 | [ 0.20, 1.24] | 2.70 | 0.007 + I-splines3 | 0.77 | 0.41 | [-0.03, 1.57] | 1.88 | 0.060 + I-splines4 | 1.37 | 0.13 | [ 1.11, 1.63] | 10.35 | < .001 + I-splines5 | -1.66 | 0.06 | [-1.79, -1.54] | -25.77 | < .001 + I-splines6 | 1.64 | 0.05 | [ 1.54, 1.73] | 33.01 | < .001 + I-splines7 | 1.28 | 0.05 | [ 1.18, 1.39] | 24.94 | < .001 + Message + + Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed + using a Wald z-distribution approximation. + diff --git a/tests/testthat/test-lcmm.R b/tests/testthat/test-lcmm.R new file mode 100644 index 000000000..c29eae255 --- /dev/null +++ b/tests/testthat/test-lcmm.R @@ -0,0 +1,34 @@ +skip_on_cran() +skip_on_os(c("mac", "linux", "solaris")) + +skip_if_not_installed("lcmm") +skip_if_not_installed("datawizard") +skip_if_not_installed("curl") +skip_if_offline() +skip_if_not_installed("httr2") +skip_if_not_installed("withr") + +withr::with_options( + list(parameters_warning_exponentiate = TRUE), + test_that("model_parameters lcmm", { + out <- tryCatch( + datawizard::data_read( + "https://github.com/easystats/circus/raw/refs/heads/main/data/lcmm.rda" + ), + error = function(e) NULL + ) + skip_if(is.null(out)) + + expect_snapshot(print(model_parameters(out$mx_linear), zap_small = TRUE)) + expect_snapshot(print(model_parameters(out$mx_beta), zap_small = TRUE)) + expect_snapshot(print(model_parameters(out$mx_splines), zap_small = TRUE)) + + expect_snapshot(print(model_parameters(out$m1_linear), zap_small = TRUE)) + expect_snapshot(print(model_parameters(out$m1_beta), zap_small = TRUE)) + expect_snapshot(print(model_parameters(out$m1_splines), zap_small = TRUE)) + + expect_snapshot(print(model_parameters(out$m2_linear), zap_small = TRUE)) + expect_snapshot(print(model_parameters(out$m2_beta), zap_small = TRUE)) + expect_snapshot(print(model_parameters(out$m2_splines), zap_small = TRUE)) + }) +) diff --git a/vignettes/clustering.Rmd b/vignettes/clustering.Rmd index f6f46d8c7..4bf8cf224 100644 --- a/vignettes/clustering.Rmd +++ b/vignettes/clustering.Rmd @@ -1,12 +1,12 @@ --- title: "Clustering with easystats" -output: +output: rmarkdown::html_vignette: vignette: > %\VignetteIndexEntry{Clustering with easystats} \usepackage[utf8]{inputenc} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console --- @@ -64,7 +64,7 @@ Clustering traditionally refers to the identification of groups of observations There are many clustering algorithms (see [this for an overview](https://scikit-learn.org/stable/modules/clustering.html)), but they can grouped in two categories: **supervised** and **unsupervised** techniques. In **supervised** techniques, you have to explicitly specify [**how many clusters**](https://easystats.github.io/parameters/reference/n_clusters.html) you want to extract. **Unsupervised** techniques, on the other hand, will estimate this number as part of their algorithm. Note that there are no inherently superior and inferior clustering methods, each come with their sets of limitations and benefits. -As an example in the tutorial below, we will use the **iris** dataset, for which we know that there are 3 "real" clusters (the 3 Species of flowers). Let's first start with visualizing the 3 "real" clusters on a 2D space of the variables created through PCA. +As an example in the tutorial below, we will use the **iris** dataset, for which we know that there are 3 "real" clusters (the 3 Species of flowers). Let's first start with visualizing the 3 "real" clusters on a 2D space of the variables created through PCA. ```{r} @@ -161,14 +161,11 @@ Hierarchical K-Means, as its name suggest, is essentially a combination of K-Mea rez_hkmeans <- cluster_analysis(data, n = 3, method = "hkmeans") rez_hkmeans # Show results - -# Visualize -plot(rez_hkmeans) + theme_modern() # Visualize ``` ### K-Medoids (PAM) -Clustering around "medoids", instead of "centroid", is considered to be a more robust version of K-means. See `cluster::pam()` for more information. +Clustering around "medoids", instead of "centroid", is considered to be a more robust version of K-means. See `cluster::pam()` for more information. ```{r} rez_pam <- cluster_analysis(data, n = 3, method = "pam") @@ -203,7 +200,7 @@ plot(rez_hclust2) + theme_modern() # Visualize ### DBSCAN -Although the DBSCAN method is quite powerful to identify clusters, it is highly dependent on its parameters, namely, `eps` and the `min_size`. Regarding the latter, the minimum size of any cluster is set by default to `0.1` (i.e., 10\% of rows), which is appropriate to avoid having too small clusters. +Although the DBSCAN method is quite powerful to identify clusters, it is highly dependent on its parameters, namely, `eps` and the `min_size`. Regarding the latter, the minimum size of any cluster is set by default to `0.1` (i.e., 10\% of rows), which is appropriate to avoid having too small clusters. The "optimal" **eps** value can be estimated using the [`n_clusters_dbscan()`](https://easystats.github.io/parameters/reference/cluster_analysis.html) function: