Skip to content

BUG: AttributeError in pandas.core.algorithms.diff when passing non-numeric types #61729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,9 +1334,15 @@ def diff(arr, n: int, axis: AxisInt = 0):
# added a check on the integer value of period
# see https://github.com/pandas-dev/pandas/issues/56607
if not lib.is_integer(n):
if not (is_float(n) and n.is_integer()):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont understand how this happens. n.is_integer should only be called when is_float(n), which should be equivalent to isinstance(n, (float, np.float_). so the method should always exist

raise ValueError("periods must be an integer")
n = int(n)
try:
if is_float(n) and n.is_integer():
n = int(n)
else:
raise ValueError("periods must be an integer")
except (AttributeError, TypeError) as err:
# Handle cases where n doesn't have is_integer method
# or other type-related errors
raise ValueError("periods must be an integer") from err
na = np.nan
dtype = arr.dtype

Expand Down
56 changes: 56 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,62 @@ def test_diff_low_precision_int(self, dtype):
expected = np.array([np.nan, 1, 0, -1, 0], dtype="float32")
tm.assert_numpy_array_equal(result, expected)

def test_diff_invalid_type_handling(self):
"""Test that diff function properly handles invalid input types"""
# Test for the bug fix where non-numeric types would raise AttributeError
# instead of ValueError

# Create a simple array for testing
arr = np.array([1, 2, 3, 4, 5])

# Test cases that should raise ValueError (not AttributeError)
invalid_inputs = [
"hello", # string
None, # None
[1, 2], # list
{"key": "value"}, # dict
object(), # generic object
]

for invalid_input in invalid_inputs:
with pytest.raises(ValueError, match="periods must be an integer"):
algos.diff(arr, invalid_input)

def test_diff_valid_float_handling(self):
"""Test that diff function properly handles valid float inputs"""

# Create a simple array for testing
arr = np.array([1, 2, 3, 4, 5])

# Test cases that should work (float values that are integers)
valid_inputs = [
1.0, # float that is an integer
2.0, # another float that is an integer
-1.0, # negative float that is an integer
]

for valid_input in valid_inputs:
# Should not raise an exception
result = algos.diff(arr, valid_input)
assert result.shape == arr.shape

def test_diff_invalid_float_handling(self):
"""Test that diff function properly handles invalid float inputs"""

# Create a simple array for testing
arr = np.array([1, 2, 3, 4, 5])

# Test cases that should raise ValueError (float values that are not integers)
invalid_float_inputs = [
1.5, # float that is not an integer
2.7, # another float that is not an integer
-1.3, # negative float that is not an integer
]

for invalid_input in invalid_float_inputs:
with pytest.raises(ValueError, match="periods must be an integer"):
algos.diff(arr, invalid_input)


@pytest.mark.parametrize("op", [np.array, pd.array])
def test_union_with_duplicates(op):
Expand Down
Loading