|
2 | 2 | from typing import Any, Literal, TypeAlias |
3 | 3 |
|
4 | 4 | import numpy as np |
| 5 | +import pandas as pd |
| 6 | +from anndata import AnnData |
| 7 | +from pandas.api.types import is_string_dtype |
5 | 8 | from spatialdata import SpatialData, read_zarr |
6 | 9 | from spatialdata.models import get_model |
7 | 10 | from xarray import DataArray, DataTree |
@@ -68,6 +71,32 @@ def _make_key_lookup(sdata: SpatialData) -> dict: |
68 | 71 | return dict_lookup |
69 | 72 |
|
70 | 73 |
|
| 74 | +def _normalize_dataframe_strings(df: pd.DataFrame) -> None: |
| 75 | + """Normalize string dtypes to object to avoid nullable string serialization issues.""" |
| 76 | + if is_string_dtype(df.index.dtype): |
| 77 | + df.index = df.index.astype(object) |
| 78 | + if df.index.isna().any(): |
| 79 | + df.index = df.index.where(~df.index.isna(), None) |
| 80 | + |
| 81 | + string_cols = df.select_dtypes(include=["string"]).columns |
| 82 | + if len(string_cols) > 0: |
| 83 | + df[string_cols] = df[string_cols].astype(object) |
| 84 | + for col in string_cols: |
| 85 | + if df[col].isna().any(): |
| 86 | + df[col] = df[col].where(df[col].notna(), None) |
| 87 | + |
| 88 | + cat_cols = df.select_dtypes(include=["category"]).columns |
| 89 | + for col in cat_cols: |
| 90 | + if is_string_dtype(df[col].cat.categories.dtype): |
| 91 | + df[col] = df[col].cat.set_categories(df[col].cat.categories.astype(object)) |
| 92 | + |
| 93 | + |
| 94 | +def _normalize_anndata_strings(adata: AnnData) -> None: |
| 95 | + """Normalize obs/var string dtypes to python-backed storage.""" |
| 96 | + _normalize_dataframe_strings(adata.obs) |
| 97 | + _normalize_dataframe_strings(adata.var) |
| 98 | + |
| 99 | + |
71 | 100 | def _force_delete_object(sdata: SpatialData, name: str) -> None: |
72 | 101 | """Force delete an object from the SpatialData object and directory. |
73 | 102 |
|
@@ -110,6 +139,9 @@ def add_element_sdata(sdata: SpatialData, element: Any, element_name: str, overw |
110 | 139 |
|
111 | 140 | _force_delete_object(sdata, element_name) |
112 | 141 |
|
| 142 | + if isinstance(element, AnnData): |
| 143 | + _normalize_anndata_strings(element) |
| 144 | + |
113 | 145 | # the element needs to validate with exactly one of the models |
114 | 146 | get_model(element) |
115 | 147 |
|
|
0 commit comments