Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/spatialdata/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,21 @@ def _validate_table_annotation_metadata(self, data: AnnData) -> None:
if len(set(expected_regions).symmetric_difference(set(found_regions))) > 0:
raise ValueError(f"Regions in the AnnData object and `{attr[self.REGION_KEY_KEY]}` do not match.")

# Warning for object/string columns with NaN in region_key or instance_key
instance_key = attr[self.INSTANCE_KEY]
region_key = attr[self.REGION_KEY_KEY]
for key_name, key_value in [("region_key", region_key), ("instance_key", instance_key)]:
if key_value in data.obs:
col = data.obs[key_value]
col_dtype = col.dtype
if (col_dtype == "object" or pd.api.types.is_string_dtype(col_dtype)) and col.isna().any():
logger.warning(
f"The {key_name} column '{key_value}' is of {col_dtype} type and contains NaN values. "
"After writing and reading with AnnData, NaN values may (depending on the AnnData version) "
"be converted to strings. This may cause issues when matching instances across read/write "
"cycles."
)

def validate(
self,
data: AnnData,
Expand Down
42 changes: 42 additions & 0 deletions tests/io/test_readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,3 +1065,45 @@ def test_read_sdata(tmp_path: Path, points: SpatialData) -> None:
assert_spatial_data_objects_are_identical(sdata_from_path, sdata_from_str)
assert_spatial_data_objects_are_identical(sdata_from_path, sdata_from_upath)
assert_spatial_data_objects_are_identical(sdata_from_path, sdata_from_zarr_group)


def test_sdata_with_nan_in_obs() -> None:
"""Test writing SpatialData with mixed string/NaN values in obs works correctly.

Regression test for https://github.com/scverse/spatialdata/issues/399
Previously this raised TypeError: expected unicode string, found nan.
Now the write succeeds, though NaN values in object-dtype columns are
converted to the string "nan" after round-trip.
"""
from spatialdata.models import TableModel

table = TableModel.parse(
AnnData(
obs=pd.DataFrame(
{
"region": ["region1", "region2"],
"instance": [0, 0],
"column_only_region1": ["string", np.nan],
"column_only_region2": [np.nan, 3],
}
)
),
region_key="region",
instance_key="instance",
region=["region1", "region2"],
)
sdata = SpatialData(tables={"table": table})
assert sdata["table"].obs["column_only_region1"].iloc[1] is np.nan
assert np.isnan(sdata["table"].obs["column_only_region2"].iloc[0])

with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "data.zarr")
sdata.write(path)

sdata2 = SpatialData.read(path)
assert "column_only_region1" in sdata2["table"].obs.columns
assert sdata2["table"].obs["column_only_region1"].iloc[0] == "string"
assert sdata2["table"].obs["column_only_region2"].iloc[1] == 3
# After round-trip, NaN in object-dtype column becomes string "nan"
assert sdata2["table"].obs["column_only_region1"].iloc[1] == "nan"
assert np.isnan(sdata2["table"].obs["column_only_region2"].iloc[0])
Loading