Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
covariate, or outcome columns will be placed into ``ignore_columns`` during
processing but are still retained and available in the output.

## Bug Fixes

- **Weight diagnostics now consistently accept DataFrame inputs**
- `design_effect`, `nonparametric_skew`, `prop_above_and_below`, and
`weighted_median_breakdown_point` now explicitly normalize DataFrame inputs
to their first column before computation, matching validation behavior and
returning scalar/Series outputs consistently.

# 0.16.0 (2026-02-09)

Expand Down
118 changes: 88 additions & 30 deletions balance/stats_and_plots/weights_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,60 @@
##########################################


def _weights_to_series(
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
) -> pd.Series:
"""Normalize supported weight inputs to a pandas Series.

If ``w`` is a DataFrame, only the first column is used (the historical
behavior in this module).

Args:
w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
Candidate weight container to normalize.

Returns:
pd.Series: Normalized weight values.

Raises:
TypeError: If ``w`` is a DataFrame with zero columns.
"""
if isinstance(w, pd.DataFrame):
if w.shape[1] == 0:
raise TypeError("weights (w) DataFrame must include at least one column.")
return w.iloc[:, 0]
if isinstance(w, pd.Series):
return w
return pd.Series(w)


def _check_weights_series_are_valid(
w: pd.Series,
*,
require_positive: bool = False,
) -> None:
"""Validate a normalized weight Series.

Args:
w (pd.Series): Weights represented as a pandas Series.
require_positive (bool, optional): If True, require at least one weight
to be strictly positive. Defaults to False.

Raises:
TypeError: If ``w`` is not numeric.
ValueError: If ``w`` includes any negative value.
ValueError: If ``require_positive`` is True and all weights are zero.
"""
if not pd.api.types.is_numeric_dtype(w):
raise TypeError(
f"weights (w) must be a number but instead they are of type: {w.dtype}."
)
if any(w < 0):
raise ValueError("weights (w) must all be non-negative values.")
if require_positive and not any(w > 0):
raise ValueError("weights (w) must include at least one positive value.")


def _check_weights_are_valid(
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame | None,
*,
Expand All @@ -39,7 +93,7 @@ def _check_weights_are_valid(
to be strictly positive. Defaults to False.

Raises:
ValueError: if weights are not numeric.
TypeError: if weights are not numeric, or if ``w`` is an empty DataFrame.
ValueError: if weights include a negative value.
ValueError: if ``require_positive`` is True and all weights are zero.

Expand All @@ -48,27 +102,15 @@ def _check_weights_are_valid(
"""
if w is None:
return None
if isinstance(w, pd.DataFrame):
w = w.iloc[:, 0] # if DataFrame, we check only the first column.
if not isinstance(w, pd.Series):
w = pd.Series(w)
# TODO: (p2) consider having a check for each type of w, instead of
# turning w into pd.Series (since this solution might not be very efficient)
if not pd.api.types.is_numeric_dtype(w):
raise TypeError(
f"weights (w) must be a number but instead they are of type: {w.dtype}."
)
if any(w < 0):
raise ValueError("weights (w) must all be non-negative values.")
if require_positive and not any(w > 0):
raise ValueError("weights (w) must include at least one positive value.")
w_series = _weights_to_series(w)
_check_weights_series_are_valid(w_series, require_positive=require_positive)

return None


# TODO: if the input is pd.DataFrame than the output will be pd.Series.
# we could make the support of this more official in the future.
def design_effect(w: pd.Series) -> np.float64:
def design_effect(
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
) -> np.float64:
"""
Kish's design effect measure.

Expand All @@ -86,7 +128,9 @@ def design_effect(w: pd.Series) -> np.float64:
ISSN 2470-6345. https://en.wikipedia.org/wiki/Design_effect

Args:
w (pd.Series): A pandas series of weights (non negative, float/int) values.
w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
Weights container with non-negative numeric values. If ``w`` is a
DataFrame, only the first column is used.

Returns:
np.float64: An estimator saying by how much the variance of the mean is expected to increase, compared to a random sample mean,
Expand All @@ -105,14 +149,17 @@ def design_effect(w: pd.Series) -> np.float64:
# 2.9880418803112336
# As expected. With a single dominating weight - the Deff is almost equal to the sample size.
"""
_check_weights_are_valid(w, require_positive=True)
w = _weights_to_series(w)
_check_weights_series_are_valid(w, require_positive=True)
from balance.util import _safe_divide_with_zero_handling

# Avoid divide by zero warning
return _safe_divide_with_zero_handling((w**2).mean(), w.mean() ** 2)


def nonparametric_skew(w: pd.Series) -> float:
def nonparametric_skew(
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
) -> float:
# TODO (p2): consider adding other skew measures (https://en.wikipedia.org/wiki/Skewness)
# look more in the literature (are there references for using this vs another, or none at all?)
# update the doc with insights, once done:
Expand All @@ -125,7 +172,9 @@ def nonparametric_skew(w: pd.Series) -> float:
- https://en.wikipedia.org/wiki/Nonparametric_skew

Args:
w (pd.Series): A pandas series of weights (non negative, float/int) values.
w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
Weights container with non-negative numeric values. If ``w`` is a
DataFrame, only the first column is used.

Returns:
np.float64: A value of skew, between -1 to 1, but for weights it's often positive (i.e.: right tailed distribution).
Expand All @@ -143,14 +192,15 @@ def nonparametric_skew(w: pd.Series) -> float:
nonparametric_skew(pd.Series((-1,1,1, 1))) #-0.5

"""
_check_weights_are_valid(w, require_positive=True)
w = _weights_to_series(w)
_check_weights_series_are_valid(w, require_positive=True)
if (len(w) == 1) or (w.std() == 0):
return float(0)
return (w.mean() - w.median()) / w.std()


def prop_above_and_below(
w: pd.Series,
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
below: tuple[float, ...] | list[float] | None = (1 / 10, 1 / 5, 1 / 3, 1 / 2, 1),
above: tuple[float, ...] | list[float] | None = (1, 2, 3, 5, 10),
return_as_series: bool = True,
Expand All @@ -169,7 +219,9 @@ def prop_above_and_below(
Note that below and above can overlap, be unordered, etc. The user is responsible for the order.

Args:
w (pd.Series): A pandas series of weights (float, non negative) values.
w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
Weights container with non-negative numeric values. If ``w`` is a
DataFrame, only the first column is used.
below (tuple[float, ...] | list[float] | None, optional):
values to check which proportion of normalized weights are *below* them.
Using None returns None.
Expand Down Expand Up @@ -238,7 +290,8 @@ def prop_above_and_below(
# dtype: float64}

"""
_check_weights_are_valid(w, require_positive=True)
w = _weights_to_series(w)
_check_weights_series_are_valid(w, require_positive=True)

# normalize weight to sample size:
w = w / w.mean()
Expand Down Expand Up @@ -276,7 +329,9 @@ def prop_above_and_below(
return out # pyre-ignore[7]: TODO: see if we can fix this pyre


def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
def weighted_median_breakdown_point(
w: list[Any] | pd.Series | npt.NDArray | pd.DataFrame,
) -> np.float64:
# TODO (p2): do we want to have weighted_quantile_breakdown_point
# so to check for quantiles other than 50%?
"""
Expand All @@ -288,7 +343,9 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
- https://en.wikipedia.org/wiki/Robust_statistics#Breakdown_point

Args:
w (pd.Series): A pandas series of weights (float, non negative values).
w (list[Any] | pd.Series | npt.NDArray | pd.DataFrame):
Weights container with non-negative numeric values. If ``w`` is a
DataFrame, only the first column is used.

Returns:
np.float64: A minimal percent of users that contain at least 50% of the weights.
Expand All @@ -308,7 +365,8 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
w = pd.Series([1,1,1,1, 10])
print(weighted_median_breakdown_point(w)) # 0.2
"""
_check_weights_are_valid(w, require_positive=True)
w = _weights_to_series(w)
_check_weights_series_are_valid(w, require_positive=True)

# normalize weight to sample size:

Expand All @@ -323,4 +381,4 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
)
# find minimal proportion of samples needed to reach 50%
# the +1 trick is to deal with cases that 1 user has a weight that is larget then 50%.
return numerator / n # breakdown_point
return np.float64(numerator / n) # breakdown_point
140 changes: 140 additions & 0 deletions tests/test_stats_and_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,23 @@ def test__check_weights_are_valid(self) -> None:
):
_check_weights_are_valid([np.nan, np.nan], require_positive=True)

# DataFrame with no columns should fail fast with a clear error
with self.assertRaisesRegex(
TypeError, "weights \\(w\\) DataFrame must include at least one column."
):
_check_weights_are_valid(pd.DataFrame(index=[0, 1]))

# Validation should always use first DataFrame column, even if later columns are valid
with self.assertRaisesRegex(TypeError, "weights \\(w\\) must be a number*"):
_check_weights_are_valid(
pd.DataFrame(
{
"bad_first": ["a", "b", "c"],
"good_second": [1.0, 2.0, 3.0],
}
)
)

def test_design_effect(self) -> None:
"""Test calculation of design effect for weighted samples.

Expand Down Expand Up @@ -200,6 +217,117 @@ def test_prop_above_and_below(self) -> None:
}
self.assertEqual({k: v.to_list() for k, v in result_dict.items()}, expected)

def test_weights_diagnostics_accept_list_and_ndarray_input(self) -> None:
"""Ensure diagnostics are equivalent across list/ndarray/Series inputs."""
from balance.stats_and_plots.weights_stats import (
design_effect,
nonparametric_skew,
prop_above_and_below,
weighted_median_breakdown_point,
)

w_list = [1.0, 2.0, 3.0, 4.0]
w_array = np.array(w_list)
w_series = pd.Series(w_list)

self.assertEqual(design_effect(w_list), design_effect(w_series))
self.assertEqual(design_effect(w_array), design_effect(w_series))
self.assertEqual(nonparametric_skew(w_list), nonparametric_skew(w_series))
self.assertEqual(nonparametric_skew(w_array), nonparametric_skew(w_series))
self.assertEqual(
weighted_median_breakdown_point(w_list),
weighted_median_breakdown_point(w_series),
)
self.assertEqual(
weighted_median_breakdown_point(w_array),
weighted_median_breakdown_point(w_series),
)

list_prop = prop_above_and_below(w_list)
array_prop = prop_above_and_below(w_array)
series_prop = prop_above_and_below(w_series)
self.assertIsNotNone(list_prop)
self.assertIsNotNone(array_prop)
self.assertIsNotNone(series_prop)
pd.testing.assert_series_equal(
_assert_type(list_prop, pd.Series), _assert_type(series_prop, pd.Series)
)
pd.testing.assert_series_equal(
_assert_type(array_prop, pd.Series), _assert_type(series_prop, pd.Series)
)

def test_weights_diagnostics_dataframe_first_column_errors(self) -> None:
"""Ensure diagnostics consistently evaluate the first DataFrame column only."""
from balance.stats_and_plots.weights_stats import (
design_effect,
nonparametric_skew,
prop_above_and_below,
weighted_median_breakdown_point,
)

bad_first_col_df = pd.DataFrame(
{
"bad_first": ["a", "b", "c"],
"good_second": [1.0, 2.0, 3.0],
}
)

for fn in (
design_effect,
nonparametric_skew,
prop_above_and_below,
weighted_median_breakdown_point,
):
with self.assertRaisesRegex(TypeError, "weights \\(w\\) must be a number*"):
fn(bad_first_col_df)

empty_df = pd.DataFrame(index=[0, 1])
for fn in (
design_effect,
nonparametric_skew,
prop_above_and_below,
weighted_median_breakdown_point,
):
with self.assertRaisesRegex(
TypeError,
"weights \\(w\\) DataFrame must include at least one column.",
):
fn(empty_df)

def test_weights_diagnostics_accept_dataframe_input(self) -> None:
"""Ensure weight diagnostics consume DataFrame input via first column."""
from balance.stats_and_plots.weights_stats import (
design_effect,
nonparametric_skew,
prop_above_and_below,
weighted_median_breakdown_point,
)

w_series = pd.Series((1.0, 2.0, 3.0, 4.0), name="weights")
w_df = pd.DataFrame(
{
"weights": w_series,
# This second column should be ignored by diagnostic helpers.
"other": (100.0, 100.0, 100.0, 100.0),
}
)

self.assertEqual(design_effect(w_df), design_effect(w_series))
self.assertEqual(nonparametric_skew(w_df), nonparametric_skew(w_series))
self.assertEqual(
weighted_median_breakdown_point(w_df),
weighted_median_breakdown_point(w_series),
)

df_prop = prop_above_and_below(w_df)
series_prop = prop_above_and_below(w_series)
self.assertIsNotNone(df_prop)
self.assertIsNotNone(series_prop)
pd.testing.assert_series_equal(
_assert_type(df_prop, pd.Series),
_assert_type(series_prop, pd.Series),
)


class TestImpactOfWeightsOnOutcome(
balance.testutil.BalanceTestCase,
Expand Down Expand Up @@ -927,6 +1055,18 @@ def test_weighted_median_breakdown_point(self) -> None:
weighted_median_breakdown_point(pd.Series((1, 1, 1, 1, 10))), 0.2
)

def test_weighted_median_breakdown_point_returns_np_float64(self) -> None:
"""Ensure weighted_median_breakdown_point consistently returns np.float64."""
from balance.stats_and_plots.weights_stats import (
weighted_median_breakdown_point,
)

result_regular = weighted_median_breakdown_point(pd.Series([1, 1, 1, 10]))
result_majority = weighted_median_breakdown_point(pd.Series([60, 20, 20]))

self.assertIsInstance(result_regular, np.float64)
self.assertIsInstance(result_majority, np.float64)

def test_design_effect_with_two_values(self) -> None:
"""Test design effect with two weight values.

Expand Down
Loading