Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion balance/utils/data_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def auto_aggregate(
if features is not None:
warnings.warn(
"features argument is unused, it will be removed in the future",
warnings.DeprecationWarning,
DeprecationWarning,
stacklevel=2,
)

Expand Down
48 changes: 48 additions & 0 deletions tests/test_util_data_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from __future__ import annotations

from unittest.mock import patch

import balance.testutil
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -206,6 +208,37 @@ def test_quantize_non_numeric_series_raises(self) -> None:
pd.Series(["x", "y", "z"]),
)

def test_quantize_non_dataframe_raises(self) -> None:
"""Test that quantize raises TypeError when pd.Series returns non-Series object.

This test covers line 162 in data_transformation.py by mocking pd.Series
to return an object that is neither a Series nor a DataFrame.
This is an edge case that can only occur if pd.Series is monkey-patched
or behaves unexpectedly.
"""

# Create a class that is neither Series nor DataFrame
class NotASeriesOrDataFrame:
pass

# Create a metaclass that makes pd.Series return NotASeriesOrDataFrame
# but keeps it as a type for isinstance checks
class SeriesMeta(type):
def __call__(cls, *args, **kwargs):
return NotASeriesOrDataFrame()

class FakeSeries(metaclass=SeriesMeta):
pass

# Mock pd.Series to use our fake series class
with patch("balance.utils.data_transformation.pd.Series", FakeSeries):
self.assertRaisesRegex(
TypeError,
"df must be a pandas DataFrame",
balance_util.quantize,
123, # Input that's not Series or DataFrame
)

def test_row_pairwise_diffs(self) -> None:
d = pd.DataFrame({"a": (1, 2, 3), "b": (-42, 8, 2)})
e = pd.DataFrame(
Expand Down Expand Up @@ -325,6 +358,21 @@ def test_auto_aggregate(self) -> None:
aggfunc="not_sum",
)

def test_auto_aggregate_features_deprecation(self) -> None:
"""Test that auto_aggregate warns when features parameter is used.

Tests that the deprecated 'features' parameter triggers a
DeprecationWarning when it is not None.
This covers line 264 in data_transformation.py.
"""
df = pd.DataFrame(
{"x": [1, 2, 3, 4], "y": [1, 1, 1, np.nan], "id": [1, 1, 2, 3]}
)

# Test that passing features parameter triggers deprecation warning
with self.assertWarns(DeprecationWarning):
balance_util.auto_aggregate(df, features=["x", "y"])

def test_fct_lump_basic_functionality(self) -> None:
"""Test basic functionality of fct_lump for category lumping.

Expand Down
Loading