r-causal
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 0 deletions b/‎DESCRIPTION‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/check_auc.R‎
Lines changed: 1 addition & 1 deletion b/‎R/check_auc.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/check_ess.R‎
Lines changed: 182 additions & 0 deletions b/‎R/check_ess.R‎
Lines changed: 182 additions & 0 deletions
diff --git a/‎R/compute_balance.R‎
Lines changed: 6 additions & 3 deletions b/‎R/compute_balance.R‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎R/compute_qq.R‎
Lines changed: 1 addition & 1 deletion b/‎R/compute_qq.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/ess.R‎
Lines changed: 3 additions & 2 deletions b/‎R/ess.R‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎R/geom_calibration.R‎
Lines changed: 14 additions & 2 deletions b/‎R/geom_calibration.R‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎R/geom_mirrored_density.R‎
Lines changed: 13 additions & 11 deletions b/‎R/geom_mirrored_density.R‎
Lines changed: 13 additions & 11 deletions
@@ -23,6 +23,7 @@ Imports:
     propensity (>= 0.0.0.9000),
     purrr,
     rlang,
+    scales,
     smd,
     tibble,
     tidyr,
 
@@ -19,6 +19,7 @@ export(bind_matches)
 export(check_auc)
 export(check_balance)
 export(check_calibration)
+export(check_ess)
 export(contains)
 export(ends_with)
 export(ess)
@@ -38,6 +39,7 @@ export(one_of)
 export(peek_vars)
 export(plot_balance)
 export(plot_calibration)
+export(plot_ess)
 export(plot_mirror_distributions)
 export(plot_qq)
 export(plot_roc_auc)
 
@@ -226,7 +226,7 @@ roc_curve <- function(
     }
 
     weights <- extract_weight_data(weights)
-    
+
     # Handle zero and negative weights
     if (any(weights <= 0, na.rm = TRUE)) {
       n_zero_neg <- sum(weights <= 0, na.rm = TRUE)
 
@@ -0,0 +1,182 @@
+#' Check Effective Sample Size
+#'
+#' Computes the effective sample size (ESS) for one or more weighting schemes,
+#' optionally stratified by treatment groups. ESS reflects how many observations
+#' you would have if all were equally weighted.
+#'
+#' @details
+#' The effective sample size (ESS) is calculated using the classical formula:
+#' \eqn{ESS = (\sum w)^2 / \sum(w^2)}.
+#'
+#' When weights vary substantially, the ESS can be much smaller than the actual
+#' number of observations, indicating that a few observations carry
+#' disproportionately large weights.
+#'
+#' When `.group` is provided, ESS is calculated separately for each group level:
+#' - For binary/categorical exposures: ESS is computed within each treatment level
+#' - For continuous exposures: The variable is divided into quantiles (using
+#'   `dplyr::ntile()`) and ESS is computed within each quantile
+#'
+#' The function returns results in a tidy format suitable for plotting or
+#' further analysis.
+#'
+#' @inheritParams check_params
+#' @param .group Optional grouping variable. When provided, ESS is calculated
+#'   separately for each group level. For continuous variables, groups are
+#'   created using quantiles.
+#' @param n_tiles For continuous `.group` variables, the number of quantile
+#'   groups to create. Default is 4 (quartiles).
+#' @param tile_labels Optional character vector of labels for the quantile groups
+#'   when `.group` is continuous. If NULL, uses "Q1", "Q2", etc.
+#'
+#' @return A tibble with columns:
+#'   \item{method}{Character. The weighting method ("observed" or weight variable name).}
+#'   \item{group}{Character. The group level (if `.group` is provided).}
+#'   \item{n}{Integer. The number of observations in the group.}
+#'   \item{ess}{Numeric. The effective sample size.}
+#'   \item{ess_pct}{Numeric. ESS as a percentage of the actual sample size.}
+#'
+#' @family balance functions
+#' @seealso [ess()] for the underlying ESS calculation, [plot_ess()] for visualization
+#'
+#' @examples
+#' # Overall ESS for different weighting schemes
+#' check_ess(nhefs_weights, .wts = c(w_ate, w_att, w_atm))
+#'
+#' # ESS by treatment group (binary exposure)
+#' check_ess(nhefs_weights, .wts = c(w_ate, w_att), .group = qsmk)
+#'
+#' # ESS by treatment group (categorical exposure)
+#' check_ess(nhefs_weights, .wts = w_cat_ate, .group = alcoholfreq_cat)
+#'
+#' # ESS by quartiles of a continuous variable
+#' check_ess(nhefs_weights, .wts = w_ate, .group = age, n_tiles = 4)
+#'
+#' # Custom labels for continuous groups
+#' check_ess(nhefs_weights, .wts = w_ate, .group = age,
+#'           n_tiles = 3, tile_labels = c("Young", "Middle", "Older"))
+#'
+#' # Without unweighted comparison
+#' check_ess(nhefs_weights, .wts = w_ate, .group = qsmk,
+#'           include_observed = FALSE)
+#'
+#' @export
+check_ess <- function(
+  .data,
+  .wts = NULL,
+  .group = NULL,
+  include_observed = TRUE,
+  n_tiles = 4,
+  tile_labels = NULL
+) {
+  # Validate inputs
+  validate_data_frame(.data)
+
+  # Handle group variable
+  group_quo <- rlang::enquo(.group)
+  has_group <- !rlang::quo_is_null(group_quo)
+
+  if (has_group) {
+    group_name <- get_column_name(group_quo, ".group")
+    validate_column_exists(.data, group_name, ".group")
+    group_var <- .data[[group_name]]
+
+    # Check if continuous (numeric and more than 10 unique values)
+    is_continuous <- is.numeric(group_var) &&
+      length(unique(stats::na.omit(group_var))) > 10
+
+    if (is_continuous) {
+      # Create quantile groups
+      if (!is.null(tile_labels) && length(tile_labels) != n_tiles) {
+        abort(
+          "Length of {.arg tile_labels} must equal {.arg n_tiles}",
+          error_class = "halfmoon_length_error"
+        )
+      }
+
+      # Create tile groups
+      .data$.ess_group <- dplyr::ntile(group_var, n_tiles)
+
+      # Apply labels
+      if (is.null(tile_labels)) {
+        tile_labels <- paste0("Q", seq_len(n_tiles))
+      }
+      .data$.ess_group <- factor(
+        .data$.ess_group,
+        levels = seq_len(n_tiles),
+        labels = tile_labels
+      )
+      group_col <- ".ess_group"
+    } else {
+      group_col <- group_name
+    }
+  }
+
+  # Handle weights
+  wts_quo <- rlang::enquo(.wts)
+
+  if (rlang::quo_is_null(wts_quo)) {
+    # No weights provided, just use observed
+    wts_names <- character()
+  } else {
+    wts_cols <- tidyselect::eval_select(wts_quo, .data)
+    wts_names <- names(wts_cols)
+
+    # Convert psw weight columns to numeric
+    for (wts_name in wts_names) {
+      .data[[wts_name]] <- extract_weight_data(.data[[wts_name]])
+    }
+  }
+
+  # Add observed if requested
+  if (include_observed || length(wts_names) == 0) {
+    .data$.observed <- 1
+    wts_names <- c(".observed", wts_names)
+  }
+
+  # Reshape to long format
+  plot_data <- tidyr::pivot_longer(
+    .data,
+    cols = dplyr::all_of(wts_names),
+    names_to = "method",
+    values_to = "weight"
+  )
+
+  # Clean up method names
+  plot_data$method <- ifelse(
+    plot_data$method == ".observed",
+    "observed",
+    plot_data$method
+  )
+
+  # Calculate ESS
+  if (has_group) {
+    # Group-wise ESS
+    ess_data <- plot_data |>
+      dplyr::group_by(method, .data[[group_col]]) |>
+      dplyr::summarise(
+        n = dplyr::n(),
+        ess = ess(weight, na.rm = TRUE),
+        ess_pct = ess / n * 100,
+        .groups = "drop"
+      ) |>
+      dplyr::rename(group = !!group_col)
+  } else {
+    # Overall ESS
+    ess_data <- plot_data |>
+      dplyr::group_by(method) |>
+      dplyr::summarise(
+        n = dplyr::n(),
+        ess = ess(weight, na.rm = TRUE),
+        ess_pct = ess / n * 100,
+        .groups = "drop"
+      )
+  }
+
+  # Clean up temporary columns
+  if (has_group && is_continuous && ".ess_group" %in% names(ess_data)) {
+    ess_data <- dplyr::select(ess_data, -.ess_group)
+  }
+
+  ess_data
+}
@@ -370,12 +370,14 @@ bal_ks <- function(
     p_ref <- if (is.null(weights)) {
       mean(covariate[idx_ref])
     } else {
-      sum(extract_weight_data(weights)[idx_ref] * covariate[idx_ref]) / sum(extract_weight_data(weights)[idx_ref])
+      sum(extract_weight_data(weights)[idx_ref] * covariate[idx_ref]) /
+        sum(extract_weight_data(weights)[idx_ref])
     }
     p_other <- if (is.null(weights)) {
       mean(covariate[idx_other])
     } else {
-      sum(extract_weight_data(weights)[idx_other] * covariate[idx_other]) / sum(extract_weight_data(weights)[idx_other])
+      sum(extract_weight_data(weights)[idx_other] * covariate[idx_other]) /
+        sum(extract_weight_data(weights)[idx_other])
     }
     return(abs(p_other - p_ref))
   }
@@ -384,7 +386,8 @@ bal_ks <- function(
   # Extract and weight
   x_ref <- covariate[idx_ref]
   x_other <- covariate[idx_other]
-  w_ref <- if (is.null(weights)) rep(1, length(x_ref)) else extract_weight_data(weights)[idx_ref]
+  w_ref <- if (is.null(weights)) rep(1, length(x_ref)) else
+    extract_weight_data(weights)[idx_ref]
   w_other <- if (is.null(weights)) rep(1, length(x_other)) else
     extract_weight_data(weights)[idx_other]
   w_ref <- w_ref / sum(w_ref)
 
@@ -259,7 +259,7 @@ compute_method_quantiles <- function(
 weighted_quantile <- function(values, quantiles, .wts) {
   # Extract numeric data from weights (handles both numeric and psw objects)
   .wts <- extract_weight_data(.wts)
-  
+
   # Remove NA values if present
   na_idx <- is.na(values) | is.na(.wts)
   if (any(na_idx)) {
 
@@ -6,6 +6,7 @@
 #'
 #' @param wts A numeric vector of weights (e.g., from survey or
 #'   inverse-probability weighting).
+#' @param na.rm Logical. Should missing values be removed? Default is FALSE.
 #'
 #' @return A single numeric value representing the effective sample size.
 #'
@@ -42,8 +43,8 @@
 #' ess(wts2)
 #'
 #' @export
-ess <- function(wts) {
+ess <- function(wts, na.rm = FALSE) {
   # Extract numeric data from psw weights if present
   wts <- extract_weight_data(wts)
-  sum(wts)^2 / sum(wts^2)
+  sum(wts, na.rm = na.rm)^2 / sum(wts^2, na.rm = na.rm)
 }
@@ -152,7 +152,13 @@ check_treatment_level <- function(group_var, treatment_level) {
   create_treatment_indicator(group_var, treatment_level)
 }
 
-check_columns <- function(data, fitted_name, group_name, treatment_level, call = rlang::caller_env()) {
+check_columns <- function(
+  data,
+  fitted_name,
+  group_name,
+  treatment_level,
+  call = rlang::caller_env()
+) {
   if (is.null(treatment_level)) {
     if (!fitted_name %in% names(data)) {
       abort(
@@ -591,7 +597,13 @@ compute_calibration_for_group <- function(
 
   # Compute calibration based on method
   calibration_result <- if (method == "breaks") {
-    compute_calibration_breaks_imp(df, bins, binning_method, conf_level, call = call)
+    compute_calibration_breaks_imp(
+      df,
+      bins,
+      binning_method,
+      conf_level,
+      call = call
+    )
   } else if (method == "logistic") {
     compute_calibration_logistic_imp(df, smooth, conf_level, k = k, call = call)
   } else if (method == "windowed") {
 
@@ -62,27 +62,29 @@ StatMirrorDensity <- ggplot2::ggproto(
         .n_groups = length(unique(group)),
         .groups = "drop"
       )
-    
+
     # Check for panels with more than 2 groups
     if (any(panel_groups$.n_groups > 2)) {
       abort(
         "Groups of three or greater not supported in `geom_mirror_density()`",
         error_class = "halfmoon_group_error"
       )
     }
-    
+
     # Join back to get panel group info for each row
     data <- dplyr::left_join(data, panel_groups, by = "PANEL")
-    
+
     # Mark which groups should be mirrored (first group in each panel)
-    data$.should_mirror <- purrr::map2_lgl(data$group, data$.panel_groups, 
+    data$.should_mirror <- purrr::map2_lgl(
+      data$group,
+      data$.panel_groups,
       ~ length(.y) == 2 && .x == .y[1]
     )
-    
+
     # Clean up temporary columns
     data$.panel_groups <- NULL
     data$.n_groups <- NULL
-    
+
     data
   },
   compute_group = function(
@@ -109,15 +111,15 @@ StatMirrorDensity <- ggplot2::ggproto(
         error_class = "halfmoon_aes_error"
       )
     }
-    
+
     # Store mirroring flag
     should_mirror <- unique(data$.should_mirror)
-    
+
     # Extract numeric data from psw weights if present
     if ("weight" %in% names(data)) {
       data$weight <- extract_weight_data(data$weight)
     }
-    
+
     data <- ggplot2::StatDensity$compute_group(
       data = data,
       scales = scales,
@@ -130,15 +132,15 @@ StatMirrorDensity <- ggplot2::ggproto(
       bounds = bounds,
       flipped_aes = flipped_aes
     )
-    
+
     # Apply mirroring if needed
     if (length(should_mirror) == 1 && should_mirror) {
       data$density <- -data$density
       data$count <- -data$count
       data$scaled <- -data$scaled
       data$ndensity <- -data$ndensity
     }
-    
+
     data
   }
 )
-Original file line number
+Diff line change
     propensity (>= 0.0.0.9000),
     purrr,
     rlang,
 +    scales,
     smd,
     tibble,
     tidyr,
Original file line number	Diff line number	Diff line change
`@@ -226,7 +226,7 @@ roc_curve <- function(`
`226`	`226`	`}`
`227`	`227`
`228`	`228`	`weights <- extract_weight_data(weights)`
`229`		`-`
	`229`	`+`
`230`	`230`	`# Handle zero and negative weights`
`231`	`231`	`if (any(weights <= 0, na.rm = TRUE)) {`
`232`	`232`	`n_zero_neg <- sum(weights <= 0, na.rm = TRUE)`