Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion hdf5storage/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,21 @@ def convert_to_numpy_str( # noqa: C901, PLR0911, PLR0912
raise TypeError(msg)


def deep_array_equal(a: Any, b: Any) -> bool:
"""
This is a more robust version of np.array_equal that works for object arrays
that contain ndarrays.
"""
if isinstance(a, np.ndarray) and isinstance(b, np.ndarray) and (a.dtype == object or b.dtype == object):
Copy link

Copilot AI Aug 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition should check that both arrays have object dtype, not just one of them. Using or could lead to incorrect comparisons when one array is object dtype and the other isn't. Consider changing to a.dtype == object and b.dtype == object

Suggested change
if isinstance(a, np.ndarray) and isinstance(b, np.ndarray) and (a.dtype == object or b.dtype == object):
if isinstance(a, np.ndarray) and isinstance(b, np.ndarray) and (a.dtype == object and b.dtype == object):

Copilot uses AI. Check for mistakes.
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe we want this, as comparisons can still be valid even if only one is an object.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this shouldn't matter either way, since if one is an object and the other isn't then the existing np.array_equal should do the right thing and return false.

if a.shape != b.shape:
return False

return all(deep_array_equal(x,y) for x,y in zip(a,b))

# fallback to normal if dtype != object
return np.array_equal(a,b)


def convert_to_numpy_bytes( # noqa: C901, PLR0911, PLR0912
data: str | bytes | bytearray | np.unsignedinteger | np.bytes_ | np.str_ | np.ndarray,
length: int | None = None,
Expand Down Expand Up @@ -1386,7 +1401,7 @@ def set_attributes_all(
if k not in existing:
attrs.create(k, val)
elif k == "MATLAB_fields":
if not np.array_equal(val, existing[k]):
if not deep_array_equal(val, existing[k]):
attrs.create(k, val)
else:
try:
Expand Down