r-causal
diff --git a/‎R/bal_model_auc.R‎
Lines changed: 4 additions & 4 deletions b/‎R/bal_model_auc.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/bal_model_roc_curve.R‎
Lines changed: 4 additions & 4 deletions b/‎R/bal_model_roc_curve.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/check_balance.R‎
Lines changed: 71 additions & 17 deletions b/‎R/check_balance.R‎
Lines changed: 71 additions & 17 deletions
diff --git a/‎R/check_ess.R‎
Lines changed: 16 additions & 16 deletions b/‎R/check_ess.R‎
Lines changed: 16 additions & 16 deletions
@@ -16,7 +16,7 @@
 #'
 #' @param .data A data frame containing the variables.
 #' @param .exposure The treatment/outcome variable (unquoted).
-#' @param .estimate The propensity score or fitted values (unquoted).
+#' @param .fitted The propensity score or fitted values (unquoted).
 #' @param .weights Optional single weight variable (unquoted). If NULL, computes
 #'   unweighted AUC.
 #' @inheritParams balance_params
@@ -40,15 +40,15 @@
 bal_model_auc <- function(
   .data,
   .exposure,
-  .estimate,
+  .fitted,
   .weights = NULL,
   na.rm = TRUE,
   .focal_level = NULL
 ) {
   validate_data_frame(.data, call = rlang::caller_env())
 
   exposure_quo <- rlang::enquo(.exposure)
-  estimate_quo <- rlang::enquo(.estimate)
+  estimate_quo <- rlang::enquo(.fitted)
   wts_quo <- rlang::enquo(.weights)
 
   # Extract column names
@@ -64,7 +64,7 @@ bal_model_auc <- function(
   }
   if (length(estimate_name) != 1) {
     abort(
-      "{.arg .estimate} must select exactly one variable",
+      "{.arg .fitted} must select exactly one variable",
       error_class = "halfmoon_arg_error",
       call = rlang::current_env()
     )
 
@@ -17,7 +17,7 @@
 #'
 #' @param .data A data frame containing the variables.
 #' @param .exposure The treatment/outcome variable (unquoted).
-#' @param .estimate The propensity score or fitted values (unquoted).
+#' @param .fitted The propensity score or fitted values (unquoted).
 #' @param .weights Optional single weight variable (unquoted). If NULL, computes
 #'   unweighted ROC curve.
 #' @inheritParams balance_params
@@ -43,15 +43,15 @@
 bal_model_roc_curve <- function(
   .data,
   .exposure,
-  .estimate,
+  .fitted,
   .weights = NULL,
   na.rm = TRUE,
   .focal_level = NULL
 ) {
   validate_data_frame(.data, call = rlang::caller_env())
 
   exposure_quo <- rlang::enquo(.exposure)
-  estimate_quo <- rlang::enquo(.estimate)
+  estimate_quo <- rlang::enquo(.fitted)
   wts_quo <- rlang::enquo(.weights)
 
   # Extract column names
@@ -67,7 +67,7 @@ bal_model_roc_curve <- function(
   }
   if (length(estimate_name) != 1) {
     abort(
-      "{.arg .estimate} must select exactly one variable",
+      "{.arg .fitted} must select exactly one variable",
       error_class = "halfmoon_arg_error",
       call = rlang::current_env()
     )
 
@@ -122,24 +122,40 @@ check_balance <- function(
     # Extract just the variables we're working with
     vars_data <- dplyr::select(.data, dplyr::all_of(var_names))
 
+    # Track variable origins for interaction filtering
+    dummy_var_mapping <- list()
+
     # Create dummy variables if requested
     if (make_dummy_vars) {
-      vars_data <- create_dummy_variables(vars_data, binary_as_single = TRUE)
+      dummy_result <- create_dummy_variables(
+        vars_data,
+        binary_as_single = TRUE,
+        return_mapping = TRUE
+      )
+      vars_data <- dummy_result$data
+      dummy_var_mapping <- dummy_result$mapping
     }
 
     # Add squared terms if requested
     if (squares) {
       numeric_vars <- purrr::map_lgl(vars_data, is.numeric)
       if (any(numeric_vars)) {
         numeric_data <- dplyr::select(vars_data, dplyr::where(is.numeric))
-        squared_data <- dplyr::mutate(
+        # Only square non-binary numeric variables
+        non_binary_numeric <- dplyr::select(
           numeric_data,
-          dplyr::across(everything(), \(x) x^2, .names = "{.col}_squared")
-        )
-        vars_data <- dplyr::bind_cols(
-          vars_data,
-          dplyr::select(squared_data, dplyr::ends_with("_squared"))
+          dplyr::where(\(x) !is_binary(x))
         )
+        if (ncol(non_binary_numeric) > 0) {
+          squared_data <- dplyr::mutate(
+            non_binary_numeric,
+            dplyr::across(everything(), \(x) x^2, .names = "{.col}_squared")
+          )
+          vars_data <- dplyr::bind_cols(
+            vars_data,
+            dplyr::select(squared_data, dplyr::ends_with("_squared"))
+          )
+        }
       }
     }
 
@@ -148,14 +164,19 @@ check_balance <- function(
       numeric_vars <- purrr::map_lgl(vars_data, is.numeric)
       if (any(numeric_vars)) {
         numeric_data <- dplyr::select(vars_data, dplyr::where(is.numeric))
-        # Only cube original variables, not squared ones
+        # Only cube original non-binary variables, not squared ones
         original_numeric <- dplyr::select(
           numeric_data,
           -dplyr::ends_with("_squared")
         )
-        if (ncol(original_numeric) > 0) {
+        # Filter out binary variables
+        non_binary_original <- dplyr::select(
+          original_numeric,
+          dplyr::where(\(x) !is_binary(x))
+        )
+        if (ncol(non_binary_original) > 0) {
           cubed_data <- dplyr::mutate(
-            original_numeric,
+            non_binary_original,
             dplyr::across(everything(), \(x) x^3, .names = "{.col}_cubed")
           )
           vars_data <- dplyr::bind_cols(
@@ -206,13 +227,32 @@ check_balance <- function(
           }
 
           # Prepare variables for interactions
-          interaction_vars <- purrr::imap(
+          interaction_vars_list <- purrr::imap(
             original_numeric,
             prepare_interaction_variable,
             binary_categorical_names = binary_categorical_names,
             original_vars_data = original_vars_data
-          ) |>
-            purrr::flatten()
+          )
+
+          # Extract the variables and update mapping for expanded binaries
+          interaction_vars <- purrr::flatten(interaction_vars_list)
+
+          # Update mapping for any expanded binary categoricals
+          for (i in seq_along(interaction_vars_list)) {
+            var_result <- interaction_vars_list[[i]]
+            var_name <- names(original_numeric)[i]
+
+            # Check if this variable was expanded (binary categorical)
+            if (var_name %in% binary_categorical_names) {
+              # The result is already flattened by prepare_interaction_variable
+              # Get the names of the expanded dummies
+              expanded_names <- names(var_result)
+              for (expanded_name in expanded_names) {
+                # Track that this expanded dummy came from the original variable
+                dummy_var_mapping[[expanded_name]] <- var_name
+              }
+            }
+          }
 
           # Now create interactions between all pairs
           var_combinations <- utils::combn(
@@ -224,7 +264,7 @@ check_balance <- function(
           # Filter out same-variable dummy interactions (e.g., sex0 x sex1)
           valid_combinations <- purrr::keep(
             var_combinations,
-            is_valid_interaction_combo
+            \(combo) is_valid_interaction_combo(combo, dummy_var_mapping)
           )
 
           # Create interaction terms using functional programming
@@ -234,9 +274,12 @@ check_balance <- function(
             interaction_vars = interaction_vars
           )
 
-          # Flatten the list and add to vars_data
+          # Flatten the list and convert to data frame
           interaction_terms <- purrr::flatten(interaction_terms)
-          vars_data <- c(vars_data, interaction_terms)
+          if (length(interaction_terms) > 0) {
+            interaction_df <- dplyr::as_tibble(interaction_terms)
+            vars_data <- dplyr::bind_cols(vars_data, interaction_df)
+          }
         }
       }
     }
@@ -568,10 +611,21 @@ prepare_interaction_variable <- function(
 }
 
 # Check if an interaction combination is valid (not between same variable dummies)
-is_valid_interaction_combo <- function(combo) {
+is_valid_interaction_combo <- function(combo, variable_mapping = NULL) {
   var1 <- combo[1]
   var2 <- combo[2]
 
+  # If we have a mapping, use it to determine if variables come from same source
+  if (!is.null(variable_mapping)) {
+    # Get the original variable for each dummy (or the variable itself if not a dummy)
+    origin1 <- variable_mapping[[var1]] %||% var1
+    origin2 <- variable_mapping[[var2]] %||% var2
+
+    # Only keep interactions between different original variables
+    return(origin1 != origin2)
+  }
+
+  # Fallback to the old regex approach if no mapping provided
   # Extract base variable names (before dummy suffixes)
   base1 <- sub("^([^0-9]+).*", "\\1", var1)
   base2 <- sub("^([^0-9]+).*", "\\1", var2)
 
@@ -12,7 +12,7 @@
 #' number of observations, indicating that a few observations carry
 #' disproportionately large weights.
 #'
-#' When `.group` is provided, ESS is calculated separately for each group level:
+#' When `.exposure` is provided, ESS is calculated separately for each exposure level:
 #' - For binary/categorical exposures: ESS is computed within each treatment level
 #' - For continuous exposures: The variable is divided into quantiles (using
 #'   `dplyr::ntile()`) and ESS is computed within each quantile
@@ -21,17 +21,17 @@
 #' further analysis.
 #'
 #' @inheritParams check_params
-#' @param .group Optional grouping variable. When provided, ESS is calculated
-#'   separately for each group level. For continuous variables, groups are
+#' @param .exposure Optional exposure variable. When provided, ESS is calculated
+#'   separately for each exposure level. For continuous variables, groups are
 #'   created using quantiles.
-#' @param n_tiles For continuous `.group` variables, the number of quantile
+#' @param n_tiles For continuous `.exposure` variables, the number of quantile
 #'   groups to create. Default is 4 (quartiles).
 #' @param tile_labels Optional character vector of labels for the quantile groups
-#'   when `.group` is continuous. If NULL, uses "Q1", "Q2", etc.
+#'   when `.exposure` is continuous. If NULL, uses "Q1", "Q2", etc.
 #'
 #' @return A tibble with columns:
 #'   \item{method}{Character. The weighting method ("observed" or weight variable name).}
-#'   \item{group}{Character. The group level (if `.group` is provided).}
+#'   \item{group}{Character. The exposure level (if `.exposure` is provided).}
 #'   \item{n}{Integer. The number of observations in the group.}
 #'   \item{ess}{Numeric. The effective sample size.}
 #'   \item{ess_pct}{Numeric. ESS as a percentage of the actual sample size.}
@@ -44,41 +44,41 @@
 #' check_ess(nhefs_weights, .weights = c(w_ate, w_att, w_atm))
 #'
 #' # ESS by treatment group (binary exposure)
-#' check_ess(nhefs_weights, .weights = c(w_ate, w_att), .group = qsmk)
+#' check_ess(nhefs_weights, .weights = c(w_ate, w_att), .exposure = qsmk)
 #'
 #' # ESS by treatment group (categorical exposure)
-#' check_ess(nhefs_weights, .weights = w_cat_ate, .group = alcoholfreq_cat)
+#' check_ess(nhefs_weights, .weights = w_cat_ate, .exposure = alcoholfreq_cat)
 #'
 #' # ESS by quartiles of a continuous variable
-#' check_ess(nhefs_weights, .weights = w_ate, .group = age, n_tiles = 4)
+#' check_ess(nhefs_weights, .weights = w_ate, .exposure = age, n_tiles = 4)
 #'
 #' # Custom labels for continuous groups
-#' check_ess(nhefs_weights, .weights = w_ate, .group = age,
+#' check_ess(nhefs_weights, .weights = w_ate, .exposure = age,
 #'           n_tiles = 3, tile_labels = c("Young", "Middle", "Older"))
 #'
 #' # Without unweighted comparison
-#' check_ess(nhefs_weights, .weights = w_ate, .group = qsmk,
+#' check_ess(nhefs_weights, .weights = w_ate, .exposure = qsmk,
 #'           include_observed = FALSE)
 #'
 #' @export
 check_ess <- function(
   .data,
   .weights = NULL,
-  .group = NULL,
+  .exposure = NULL,
   include_observed = TRUE,
   n_tiles = 4,
   tile_labels = NULL
 ) {
   # Validate inputs
   validate_data_frame(.data)
 
-  # Handle group variable
-  group_quo <- rlang::enquo(.group)
+  # Handle exposure variable
+  group_quo <- rlang::enquo(.exposure)
   has_group <- !rlang::quo_is_null(group_quo)
 
   if (has_group) {
-    group_name <- get_column_name(group_quo, ".group")
-    validate_column_exists(.data, group_name, ".group")
+    group_name <- get_column_name(group_quo, ".exposure")
+    validate_column_exists(.data, group_name, ".exposure")
     group_var <- .data[[group_name]]
 
     # Check if continuous (numeric and more than 10 unique values)