facebookresearch · neuralsorcerer · Feb 13, 2026 · Feb 13, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,13 @@
     covariate, or outcome columns will be placed into ``ignore_columns`` during
     processing but are still retained and available in the output.
 
+## Bug Fixes
+
+- **Weight diagnostics now consistently accept DataFrame inputs**
+  - `design_effect`, `nonparametric_skew`, `prop_above_and_below`, and
+    `weighted_median_breakdown_point` now explicitly normalize DataFrame inputs
+    to their first column before computation, matching validation behavior and
+    returning scalar/Series outputs consistently.
 
 # 0.16.0 (2026-02-09)
 

diff --git a/balance/stats_and_plots/weights_stats.py b/balance/stats_and_plots/weights_stats.py
@@ -24,6 +24,60 @@
 ##########################################
 
 
+def _weights_to_series(
+    w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
+) -> pd.Series:
+    """Normalize supported weight inputs to a pandas Series.
+
+    If ``w`` is a DataFrame, only the first column is used (the historical
+    behavior in this module).
+
+    Args:
+        w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
+            Candidate weight container to normalize.
+
+    Returns:
+        pd.Series: Normalized weight values.
+
+    Raises:
+        TypeError: If ``w`` is a DataFrame with zero columns.
+    """
+    if isinstance(w, pd.DataFrame):
+        if w.shape[1] == 0:
+            raise TypeError("weights (w) DataFrame must include at least one column.")
+        return w.iloc[:, 0]
+    if isinstance(w, pd.Series):
+        return w
+    return pd.Series(w)
+
+
+def _check_weights_series_are_valid(
+    w: pd.Series,
+    *,
+    require_positive: bool = False,
+) -> None:
+    """Validate a normalized weight Series.
+
+    Args:
+        w (pd.Series): Weights represented as a pandas Series.
+        require_positive (bool, optional): If True, require at least one weight
+            to be strictly positive. Defaults to False.
+
+    Raises:
+        TypeError: If ``w`` is not numeric.
+        ValueError: If ``w`` includes any negative value.
+        ValueError: If ``require_positive`` is True and all weights are zero.
+    """
+    if not pd.api.types.is_numeric_dtype(w):
+        raise TypeError(
+            f"weights (w) must be a number but instead they are of type: {w.dtype}."
+        )
+    if any(w < 0):
+        raise ValueError("weights (w) must all be non-negative values.")
+    if require_positive and not any(w > 0):
+        raise ValueError("weights (w) must include at least one positive value.")
+
+
 def _check_weights_are_valid(
     w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame | None,
     *,
@@ -39,7 +93,7 @@ def _check_weights_are_valid(
             to be strictly positive. Defaults to False.
 
     Raises:
-        ValueError: if weights are not numeric.
+        TypeError: if weights are not numeric, or if ``w`` is an empty DataFrame.
         ValueError: if weights include a negative value.
         ValueError: if ``require_positive`` is True and all weights are zero.
 
@@ -48,27 +102,15 @@ def _check_weights_are_valid(
     """
     if w is None:
         return None
-    if isinstance(w, pd.DataFrame):
-        w = w.iloc[:, 0]  # if DataFrame, we check only the first column.
-    if not isinstance(w, pd.Series):
-        w = pd.Series(w)
-        # TODO: (p2) consider having a check for each type of w, instead of
-        #            turning w into pd.Series (since this solution might not be very efficient)
-    if not pd.api.types.is_numeric_dtype(w):
-        raise TypeError(
-            f"weights (w) must be a number but instead they are of type: {w.dtype}."
-        )
-    if any(w < 0):
-        raise ValueError("weights (w) must all be non-negative values.")
-    if require_positive and not any(w > 0):
-        raise ValueError("weights (w) must include at least one positive value.")
+    w_series = _weights_to_series(w)
+    _check_weights_series_are_valid(w_series, require_positive=require_positive)
 
     return None
 
 
-# TODO: if the input is pd.DataFrame than the output will be pd.Series.
-#       we could make the support of this more official in the future.
-def design_effect(w: pd.Series) -> np.float64:
+def design_effect(
+    w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
+) -> np.float64:
     """
     Kish's design effect measure.
 
@@ -86,7 +128,9 @@ def design_effect(w: pd.Series) -> np.float64:
     ISSN 2470-6345. https://en.wikipedia.org/wiki/Design_effect
 
     Args:
-        w (pd.Series): A pandas series of weights (non negative, float/int) values.
+        w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
+            Weights container with non-negative numeric values. If ``w`` is a
+            DataFrame, only the first column is used.
 
     Returns:
         np.float64: An estimator saying by how much the variance of the mean is expected to increase, compared to a random sample mean,
@@ -105,14 +149,17 @@ def design_effect(w: pd.Series) -> np.float64:
                 # 2.9880418803112336
                 # As expected. With a single dominating weight - the Deff is almost equal to the sample size.
     """
-    _check_weights_are_valid(w, require_positive=True)
+    w = _weights_to_series(w)
+    _check_weights_series_are_valid(w, require_positive=True)
     from balance.util import _safe_divide_with_zero_handling
 
     # Avoid divide by zero warning
     return _safe_divide_with_zero_handling((w**2).mean(), w.mean() ** 2)
 
 
-def nonparametric_skew(w: pd.Series) -> float:
+def nonparametric_skew(
+    w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
+) -> float:
     # TODO (p2): consider adding other skew measures (https://en.wikipedia.org/wiki/Skewness)
     #            look more in the literature (are there references for using this vs another, or none at all?)
     #            update the doc with insights, once done:
@@ -125,7 +172,9 @@ def nonparametric_skew(w: pd.Series) -> float:
     - https://en.wikipedia.org/wiki/Nonparametric_skew
 
     Args:
-        w (pd.Series): A pandas series of weights (non negative, float/int) values.
+        w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
+            Weights container with non-negative numeric values. If ``w`` is a
+            DataFrame, only the first column is used.
 
     Returns:
         np.float64: A value of skew, between -1 to 1, but for weights it's often positive (i.e.: right tailed distribution).
@@ -143,14 +192,15 @@ def nonparametric_skew(w: pd.Series) -> float:
             nonparametric_skew(pd.Series((-1,1,1, 1)))   #-0.5
 
     """
-    _check_weights_are_valid(w, require_positive=True)
+    w = _weights_to_series(w)
+    _check_weights_series_are_valid(w, require_positive=True)
     if (len(w) == 1) or (w.std() == 0):
         return float(0)
     return (w.mean() - w.median()) / w.std()
 
 
 def prop_above_and_below(
-    w: pd.Series,
+    w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
     below: tuple[float, ...] | list[float] | None = (1 / 10, 1 / 5, 1 / 3, 1 / 2, 1),
     above: tuple[float, ...] | list[float] | None = (1, 2, 3, 5, 10),
     return_as_series: bool = True,
@@ -169,7 +219,9 @@ def prop_above_and_below(
     Note that below and above can overlap, be unordered, etc. The user is responsible for the order.
 
     Args:
-        w (pd.Series): A pandas series of weights (float, non negative) values.
+        w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
+            Weights container with non-negative numeric values. If ``w`` is a
+            DataFrame, only the first column is used.
         below (tuple[float, ...] | list[float] | None, optional):
             values to check which proportion of normalized weights are *below* them.
             Using None returns None.
@@ -238,7 +290,8 @@ def prop_above_and_below(
                 # dtype: float64}
 
     """
-    _check_weights_are_valid(w, require_positive=True)
+    w = _weights_to_series(w)
+    _check_weights_series_are_valid(w, require_positive=True)
 
     # normalize weight to sample size:
     w = w / w.mean()
@@ -276,7 +329,9 @@ def prop_above_and_below(
     return out  # pyre-ignore[7]:  TODO: see if we can fix this pyre
 
 
-def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
+def weighted_median_breakdown_point(
+    w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
+) -> np.float64:
     # TODO (p2): do we want to have weighted_quantile_breakdown_point
     # so to check for quantiles other than 50%?
     """
@@ -288,7 +343,9 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
     - https://en.wikipedia.org/wiki/Robust_statistics#Breakdown_point
 
     Args:
-        w (pd.Series): A pandas series of weights (float, non negative values).
+        w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
+            Weights container with non-negative numeric values. If ``w`` is a
+            DataFrame, only the first column is used.
 
     Returns:
         np.float64: A minimal percent of users that contain at least 50% of the weights.
@@ -308,7 +365,8 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
             w = pd.Series([1,1,1,1, 10])
             print(weighted_median_breakdown_point(w)) # 0.2
     """
-    _check_weights_are_valid(w, require_positive=True)
+    w = _weights_to_series(w)
+    _check_weights_series_are_valid(w, require_positive=True)
 
     # normalize weight to sample size:
 
@@ -323,4 +381,4 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
         )
     # find minimal proportion of samples needed to reach 50%
     # the +1 trick is to deal with cases that 1 user has a weight that is larget then 50%.
-    return numerator / n  # breakdown_point
+    return np.float64(numerator / n)  # breakdown_point
diff --git a/tests/test_stats_and_plots.py b/tests/test_stats_and_plots.py
@@ -79,6 +79,23 @@ def test__check_weights_are_valid(self) -> None:
         ):
             _check_weights_are_valid([np.nan, np.nan], require_positive=True)
 
+        # DataFrame with no columns should fail fast with a clear error
+        with self.assertRaisesRegex(
+            TypeError, "weights \\(w\\) DataFrame must include at least one column."
+        ):
+            _check_weights_are_valid(pd.DataFrame(index=[0, 1]))
+
+        # Validation should always use first DataFrame column, even if later columns are valid
+        with self.assertRaisesRegex(TypeError, "weights \\(w\\) must be a number*"):
+            _check_weights_are_valid(
+                pd.DataFrame(
+                    {
+                        "bad_first": ["a", "b", "c"],
+                        "good_second": [1.0, 2.0, 3.0],
+                    }
+                )
+            )
+
     def test_design_effect(self) -> None:
         """Test calculation of design effect for weighted samples.
 
@@ -200,6 +217,117 @@ def test_prop_above_and_below(self) -> None:
         }
         self.assertEqual({k: v.to_list() for k, v in result_dict.items()}, expected)
 
+    def test_weights_diagnostics_accept_list_and_ndarray_input(self) -> None:
+        """Ensure diagnostics are equivalent across list/ndarray/Series inputs."""
+        from balance.stats_and_plots.weights_stats import (
+            design_effect,
+            nonparametric_skew,
+            prop_above_and_below,
+            weighted_median_breakdown_point,
+        )
+
+        w_list = [1.0, 2.0, 3.0, 4.0]
+        w_array = np.array(w_list)
+        w_series = pd.Series(w_list)
+
+        self.assertEqual(design_effect(w_list), design_effect(w_series))
+        self.assertEqual(design_effect(w_array), design_effect(w_series))
+        self.assertEqual(nonparametric_skew(w_list), nonparametric_skew(w_series))
+        self.assertEqual(nonparametric_skew(w_array), nonparametric_skew(w_series))
+        self.assertEqual(
+            weighted_median_breakdown_point(w_list),
+            weighted_median_breakdown_point(w_series),
+        )
+        self.assertEqual(
+            weighted_median_breakdown_point(w_array),
+            weighted_median_breakdown_point(w_series),
+        )
+
+        list_prop = prop_above_and_below(w_list)
+        array_prop = prop_above_and_below(w_array)
+        series_prop = prop_above_and_below(w_series)
+        self.assertIsNotNone(list_prop)
+        self.assertIsNotNone(array_prop)
+        self.assertIsNotNone(series_prop)
+        pd.testing.assert_series_equal(
+            _assert_type(list_prop, pd.Series), _assert_type(series_prop, pd.Series)
+        )
+        pd.testing.assert_series_equal(
+            _assert_type(array_prop, pd.Series), _assert_type(series_prop, pd.Series)
+        )
+
+    def test_weights_diagnostics_dataframe_first_column_errors(self) -> None:
+        """Ensure diagnostics consistently evaluate the first DataFrame column only."""
+        from balance.stats_and_plots.weights_stats import (
+            design_effect,
+            nonparametric_skew,
+            prop_above_and_below,
+            weighted_median_breakdown_point,
+        )
+
+        bad_first_col_df = pd.DataFrame(
+            {
+                "bad_first": ["a", "b", "c"],
+                "good_second": [1.0, 2.0, 3.0],
+            }
+        )
+
+        for fn in (
+            design_effect,
+            nonparametric_skew,
+            prop_above_and_below,
+            weighted_median_breakdown_point,
+        ):
+            with self.assertRaisesRegex(TypeError, "weights \\(w\\) must be a number*"):
+                fn(bad_first_col_df)
+
+        empty_df = pd.DataFrame(index=[0, 1])
+        for fn in (
+            design_effect,
+            nonparametric_skew,
+            prop_above_and_below,
+            weighted_median_breakdown_point,
+        ):
+            with self.assertRaisesRegex(
+                TypeError,
+                "weights \\(w\\) DataFrame must include at least one column.",
+            ):
+                fn(empty_df)
+
+    def test_weights_diagnostics_accept_dataframe_input(self) -> None:
+        """Ensure weight diagnostics consume DataFrame input via first column."""
+        from balance.stats_and_plots.weights_stats import (
+            design_effect,
+            nonparametric_skew,
+            prop_above_and_below,
+            weighted_median_breakdown_point,
+        )
+
+        w_series = pd.Series((1.0, 2.0, 3.0, 4.0), name="weights")
+        w_df = pd.DataFrame(
+            {
+                "weights": w_series,
+                # This second column should be ignored by diagnostic helpers.
+                "other": (100.0, 100.0, 100.0, 100.0),
+            }
+        )
+
+        self.assertEqual(design_effect(w_df), design_effect(w_series))
+        self.assertEqual(nonparametric_skew(w_df), nonparametric_skew(w_series))
+        self.assertEqual(
+            weighted_median_breakdown_point(w_df),
+            weighted_median_breakdown_point(w_series),
+        )
+
+        df_prop = prop_above_and_below(w_df)
+        series_prop = prop_above_and_below(w_series)
+        self.assertIsNotNone(df_prop)
+        self.assertIsNotNone(series_prop)
+        pd.testing.assert_series_equal(
+            _assert_type(df_prop, pd.Series),
+            _assert_type(series_prop, pd.Series),
+        )
+
 
 class TestImpactOfWeightsOnOutcome(
     balance.testutil.BalanceTestCase,
@@ -927,6 +1055,18 @@ def test_weighted_median_breakdown_point(self) -> None:
             weighted_median_breakdown_point(pd.Series((1, 1, 1, 1, 10))), 0.2
         )
 
+    def test_weighted_median_breakdown_point_returns_np_float64(self) -> None:
+        """Ensure weighted_median_breakdown_point consistently returns np.float64."""
+        from balance.stats_and_plots.weights_stats import (
+            weighted_median_breakdown_point,
+        )
+
+        result_regular = weighted_median_breakdown_point(pd.Series([1, 1, 1, 10]))
+        result_majority = weighted_median_breakdown_point(pd.Series([60, 20, 20]))
+
+        self.assertIsInstance(result_regular, np.float64)
+        self.assertIsInstance(result_majority, np.float64)
+
     def test_design_effect_with_two_values(self) -> None:
         """Test design effect with two weight values.