Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
5718cba
add basic support and remote SpatialData tests
berombau Jan 24, 2025
a8be620
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 24, 2025
ce686e4
fix pre-commit
LucaMarconato Jan 31, 2025
e7fa020
Revert "Update pyproject.toml"
LucaMarconato Jan 31, 2025
a9c0801
removed 3.13 from test ci
LucaMarconato Jan 31, 2025
5cfdaec
Merge branch 'no_python_313' into remote2
LucaMarconato Jan 31, 2025
4fe6a47
fix
LucaMarconato Jan 31, 2025
d97a1d2
uploading sdata to local s3 storage
LucaMarconato Jan 31, 2025
5e26b5e
add _open_zarr_store
berombau Jan 31, 2025
5794871
revert changing write function signature
berombau Jan 31, 2025
0207ff7
update _open_zarr_store with StoreLike
berombau Jan 31, 2025
7e497ff
read image element from base store
berombau Jan 31, 2025
c674281
clean up remote mock tests, focus only on reading raster elements
berombau Feb 1, 2025
fb953a0
improve remote http test, add alternative
berombau Feb 1, 2025
52bb5fc
add support for consolidated metadata store in util function, add _cr…
berombau Feb 1, 2025
ca82493
allow for groups as store input
berombau Feb 1, 2025
ecea0e6
handle consolidated metadata with upath
berombau Feb 1, 2025
734eb45
split remote reading tests between http and http with consolidated me…
berombau Feb 1, 2025
c0ffb1c
remove f_store_path, support remote raster types fully and keep local…
berombau Feb 1, 2025
d60bd85
Fix metadata_key bug now that store is not always FSStore. Add extra …
berombau Feb 1, 2025
c3fa8cf
add mypy fixes
berombau Feb 1, 2025
d16a638
Merge branch 'main' into remote2
ap-- Mar 17, 2025
23f4a89
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 17, 2025
a80588c
Fix linting errors
ap-- Mar 17, 2025
020810b
fixed majority of tests
ap-- Mar 17, 2025
ba25564
spatialdata._io._utils: _open_zarr_store has to set dimension_separat…
ap-- Mar 17, 2025
b2ff8f8
stay in sync with ome zarr format
ap-- Mar 17, 2025
70480ce
spatialdata._io.io_raster: support remote stores
ap-- Mar 17, 2025
10cef3f
prevent crashing tests on 3.10
ap-- Mar 17, 2025
d9e4eac
Merge branch 'spatial-data-crash310' into remote2
ap-- Mar 17, 2025
ac95ecf
add basic support and remote SpatialData tests
berombau Jan 24, 2025
45375fa
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 24, 2025
ab1b0f2
fix pre-commit
LucaMarconato Jan 31, 2025
c88b3a5
Revert "Update pyproject.toml"
LucaMarconato Jan 31, 2025
5d016ed
fix
LucaMarconato Jan 31, 2025
3549e16
uploading sdata to local s3 storage
LucaMarconato Jan 31, 2025
71f8a8b
add _open_zarr_store
berombau Jan 31, 2025
7f358dc
revert changing write function signature
berombau Jan 31, 2025
9bcdd34
update _open_zarr_store with StoreLike
berombau Jan 31, 2025
8228043
read image element from base store
berombau Jan 31, 2025
f25b6ce
clean up remote mock tests, focus only on reading raster elements
berombau Feb 1, 2025
7561dc1
improve remote http test, add alternative
berombau Feb 1, 2025
469f171
add support for consolidated metadata store in util function, add _cr…
berombau Feb 1, 2025
ce76527
allow for groups as store input
berombau Feb 1, 2025
df22639
handle consolidated metadata with upath
berombau Feb 1, 2025
dadc489
split remote reading tests between http and http with consolidated me…
berombau Feb 1, 2025
cfb03d5
remove f_store_path, support remote raster types fully and keep local…
berombau Feb 1, 2025
813ff90
Fix metadata_key bug now that store is not always FSStore. Add extra …
berombau Feb 1, 2025
94eb25f
add mypy fixes
berombau Feb 1, 2025
7ea6da2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 17, 2025
fc532a3
Fix linting errors
ap-- Mar 17, 2025
46319fa
fixed majority of tests
ap-- Mar 17, 2025
075877c
spatialdata._io._utils: _open_zarr_store has to set dimension_separat…
ap-- Mar 17, 2025
28837b1
stay in sync with ome zarr format
ap-- Mar 17, 2025
1df5be6
spatialdata._io.io_raster: support remote stores
ap-- Mar 17, 2025
3438bda
prevent crashing tests on 3.10
ap-- Mar 17, 2025
dd4e29d
merge
melonora Aug 20, 2025
6f64ced
correct pyproject.toml
melonora Aug 20, 2025
008c0e4
remove type checking
melonora Aug 20, 2025
5eacf56
use sanitized _store
melonora Aug 20, 2025
12a47e3
Cloud fix remote embl datasets (#904)
ap-- Aug 20, 2025
c76f8c4
spatialdata.io.io_shapes: fix support for remote shapes (#902)
ap-- Aug 20, 2025
c11752d
spatialdata.io.io_points: fix support for remote points (#903)
ap-- Aug 20, 2025
f0a0c04
spatialdata._io: test remote tables support and fix repr (#905)
ap-- Aug 20, 2025
a0ec5be
small refactor
melonora Aug 20, 2025
c514a0b
add remote write support + cleanup (#973)
melonora Aug 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ dependencies = [
"dask-image",
"dask>=2024.4.1,<=2024.11.2",
"datashader",
"fsspec",
"fsspec[s3,http]",
"geopandas>=0.14",
"multiscale_spatial_image>=2.0.3",
"networkx",
"numba>=0.55.0",
"numpy",
"ome_zarr>=0.8.4",
"universal_pathlib>=0.2.6",
"pandas",
"pooch",
"pyarrow",
Expand All @@ -59,6 +60,7 @@ test = [
"pytest-cov",
"pytest-mock",
"torch",
"moto[s3,server]"
]
docs = [
"sphinx>=4.5",
Expand Down
10 changes: 1 addition & 9 deletions src/spatialdata/_core/query/relational_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ def _call_join(
return elements_dict, table


def match_table_to_element(sdata: SpatialData, element_name: str, table_name: str = "table") -> AnnData:
def match_table_to_element(sdata: SpatialData, element_name: str, table_name: str) -> AnnData:
"""
Filter the table and reorders the rows to match the instances (rows/labels) of the specified SpatialElement.

Expand All @@ -738,14 +738,6 @@ def match_table_to_element(sdata: SpatialData, element_name: str, table_name: st
match_element_to_table : Function to match a spatial element to a table.
join_spatialelement_table : General function, to join spatial elements with a table with more control.
"""
if table_name is None:
warnings.warn(
"Assumption of table with name `table` being present is being deprecated in SpatialData v0.1. "
"Please provide the name of the table as argument to table_name.",
DeprecationWarning,
stacklevel=2,
)
table_name = "table"
_, table = join_spatialelement_table(
sdata=sdata, spatial_element_names=element_name, table_name=table_name, how="left", match_rows="left"
)
Expand Down
164 changes: 62 additions & 102 deletions src/spatialdata/_core/spatialdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from dask.delayed import Delayed
from geopandas import GeoDataFrame
from ome_zarr.io import parse_url
from ome_zarr.types import JSONDict
from shapely import MultiPolygon, Polygon
from xarray import DataArray, DataTree

Expand All @@ -30,11 +29,8 @@
validate_table_attr_keys,
)
from spatialdata._logging import logger
from spatialdata._types import ArrayLike, Raster_T
from spatialdata._utils import (
_deprecation_alias,
_error_message_add_element,
)
from spatialdata._types import ArrayLike, Raster_T, StoreLike
from spatialdata._utils import _deprecation_alias
from spatialdata.models import (
Image2DModel,
Image3DModel,
Expand Down Expand Up @@ -601,7 +597,7 @@ def path(self, value: Path | None) -> None:
)

def _get_groups_for_element(
self, zarr_path: Path, element_type: str, element_name: str
self, zarr_path: StoreLike, element_type: str, element_name: str
) -> tuple[zarr.Group, zarr.Group, zarr.Group]:
"""
Get the Zarr groups for the root, element_type and element for a specific element.
Expand All @@ -621,9 +617,9 @@ def _get_groups_for_element(
-------
either the existing Zarr subgroup or a new one.
"""
if not isinstance(zarr_path, Path):
raise ValueError("zarr_path should be a Path object")
store = parse_url(zarr_path, mode="r+").store
from spatialdata._io._utils import _open_zarr_store

store = _open_zarr_store(zarr_path, mode="r+")
root = zarr.group(store=store)
if element_type not in ["images", "labels", "points", "polygons", "shapes", "tables"]:
raise ValueError(f"Unknown element type {element_type}")
Expand Down Expand Up @@ -1068,9 +1064,12 @@ def elements_paths_on_disk(self) -> list[str]:
-------
A list of paths of the elements saved in the Zarr store.
"""
from spatialdata._io._utils import _open_zarr_store

if self.path is None:
raise ValueError("The SpatialData object is not backed by a Zarr store.")
store = parse_url(self.path, mode="r").store

store = _open_zarr_store(self.path)
root = zarr.group(store=store)
elements_in_zarr = []

Expand Down Expand Up @@ -1205,12 +1204,16 @@ def write(
:class:`~spatialdata._io.format.CurrentRasterFormat`, :class:`~spatialdata._io.format.CurrentShapesFormat`,
:class:`~spatialdata._io.format.CurrentPointsFormat`, :class:`~spatialdata._io.format.CurrentTablesFormat`.
"""
from spatialdata._io._utils import _open_zarr_store

if isinstance(file_path, str):
file_path = Path(file_path)
self._validate_can_safely_write_to_path(file_path, overwrite=overwrite)
self._validate_all_elements()
if isinstance(file_path, Path):
# TODO: also validate remote paths
self._validate_can_safely_write_to_path(file_path, overwrite=overwrite)
self._validate_all_elements()

store = parse_url(file_path, mode="w").store
store = _open_zarr_store(file_path, mode="w")
zarr_group = zarr.group(store=store, overwrite=overwrite)
self.write_attrs(zarr_group=zarr_group)
store.close()
Expand All @@ -1236,20 +1239,22 @@ def write(
def _write_element(
self,
element: SpatialElement | AnnData,
zarr_container_path: Path,
zarr_container_path: StoreLike,
element_type: str,
element_name: str,
overwrite: bool,
format: SpatialDataFormat | list[SpatialDataFormat] | None = None,
) -> None:
if not isinstance(zarr_container_path, Path):
if not isinstance(zarr_container_path, StoreLike):
raise ValueError(
f"zarr_container_path must be a Path object, type(zarr_container_path) = {type(zarr_container_path)}."
f"zarr_container_path must be a 'StoreLike' object "
f"(str | Path | UPath | zarr.storage.StoreLike | zarr.Group), got: {type(zarr_container_path)}."
)
if isinstance(zarr_container_path, Path):
file_path_of_element = zarr_container_path / element_type / element_name
self._validate_can_safely_write_to_path(
file_path=file_path_of_element, overwrite=overwrite, saving_an_element=True
)
file_path_of_element = zarr_container_path / element_type / element_name
self._validate_can_safely_write_to_path(
file_path=file_path_of_element, overwrite=overwrite, saving_an_element=True
)

root_group, element_type_group, _ = self._get_groups_for_element(
zarr_path=zarr_container_path, element_type=element_type, element_name=element_name
Expand All @@ -1259,14 +1264,27 @@ def _write_element(

parsed = _parse_formats(formats=format)

# We pass on zarr_container_path to ensure proper paths when writing to remote system even when on windows.
if element_type == "images":
write_image(image=element, group=element_type_group, name=element_name, format=parsed["raster"])
elif element_type == "labels":
write_labels(labels=element, group=root_group, name=element_name, format=parsed["raster"])
elif element_type == "points":
write_points(points=element, group=element_type_group, name=element_name, format=parsed["points"])
write_points(
points=element,
group=element_type_group,
name=element_name,
zarr_container_path=zarr_container_path,
format=parsed["points"],
)
elif element_type == "shapes":
write_shapes(shapes=element, group=element_type_group, name=element_name, format=parsed["shapes"])
write_shapes(
shapes=element,
group=element_type_group,
name=element_name,
zarr_container_path=zarr_container_path,
format=parsed["shapes"],
)
elif element_type == "tables":
write_table(table=element, group=element_type_group, name=element_name, format=parsed["tables"])
else:
Expand Down Expand Up @@ -1376,7 +1394,7 @@ def delete_element_from_disk(self, element_name: str | list[str]) -> None:
self.delete_element_from_disk(name)
return

from spatialdata._io._utils import _backed_elements_contained_in_path
from spatialdata._io._utils import _backed_elements_contained_in_path, _open_zarr_store

if self.path is None:
raise ValueError("The SpatialData object is not backed by a Zarr store.")
Expand Down Expand Up @@ -1417,7 +1435,7 @@ def delete_element_from_disk(self, element_name: str | list[str]) -> None:
)

# delete the element
store = parse_url(self.path, mode="r+").store
store = _open_zarr_store(self.path)
root = zarr.group(store=store)
root[element_type].pop(element_name)
store.close()
Expand All @@ -1438,15 +1456,24 @@ def _check_element_not_on_disk_with_different_type(self, element_type: str, elem
)

def write_consolidated_metadata(self) -> None:
store = parse_url(self.path, mode="r+").store
# consolidate metadata to more easily support remote reading bug in zarr. In reality, 'zmetadata' is written
# instead of '.zmetadata' see discussion https://github.com/zarr-developers/zarr-python/issues/1121
zarr.consolidate_metadata(store, metadata_key=".zmetadata")
from spatialdata._io._utils import _open_zarr_store

store = _open_zarr_store(self.path)
# Note that the store can be local (which does not have the zmetadata bug)
# or a remote FSStore (which has the bug).
# Consolidate metadata to more easily support remote reading bug in zarr.
# We write 'zmetadata' instead of the standard '.zmetadata' to avoid the FSStore bug.
# See discussion https://github.com/zarr-developers/zarr-python/issues/1121
zarr.consolidate_metadata(store, metadata_key="zmetadata")
store.close()

def has_consolidated_metadata(self) -> bool:
from spatialdata._io._utils import _open_zarr_store

return_value = False
store = parse_url(self.path, mode="r").store
store = _open_zarr_store(self.path)
# Note that the store can be local (which does not have the zmetadata bug)
# or a remote FSStore (which has the bug).
if "zmetadata" in store:
return_value = True
store.close()
Expand Down Expand Up @@ -1575,15 +1602,11 @@ def write_transformations(self, element_name: str | None = None) -> None:
)
axes = get_axes_names(element)
if isinstance(element, DataArray | DataTree):
from spatialdata._io._utils import (
overwrite_coordinate_transformations_raster,
)
from spatialdata._io._utils import overwrite_coordinate_transformations_raster

overwrite_coordinate_transformations_raster(group=element_group, axes=axes, transformations=transformations)
elif isinstance(element, DaskDataFrame | GeoDataFrame | AnnData):
from spatialdata._io._utils import (
overwrite_coordinate_transformations_non_raster,
)
from spatialdata._io._utils import overwrite_coordinate_transformations_non_raster

overwrite_coordinate_transformations_non_raster(
group=element_group, axes=axes, transformations=transformations
Expand Down Expand Up @@ -1792,41 +1815,16 @@ def table(self) -> None | AnnData:
-------
The table.
"""
warnings.warn(
"Table accessor will be deprecated with SpatialData version 0.1, use sdata.tables instead.",
DeprecationWarning,
stacklevel=2,
)
# Isinstance will still return table if anndata has 0 rows.
if isinstance(self.tables.get("table"), AnnData):
return self.tables["table"]
return None
raise AttributeError("The property 'table' is deprecated. use '.tables' instead.")

@table.setter
def table(self, table: AnnData) -> None:
warnings.warn(
"Table setter will be deprecated with SpatialData version 0.1, use tables instead.",
DeprecationWarning,
stacklevel=2,
)
TableModel().validate(table)
if self.tables.get("table") is not None:
raise ValueError("The table already exists. Use del sdata.tables['table'] to remove it first.")
self.tables["table"] = table
raise AttributeError("The property 'table' is deprecated. use '.tables' instead.")

@table.deleter
def table(self) -> None:
"""Delete the table."""
warnings.warn(
"del sdata.table will be deprecated with SpatialData version 0.1, use del sdata.tables['table'] instead.",
DeprecationWarning,
stacklevel=2,
)
if self.tables.get("table"):
del self.tables["table"]
else:
# More informative than the error in the zarr library.
raise KeyError("table with name 'table' not present in the SpatialData object.")
raise AttributeError("The property 'table' is deprecated. use '.tables' instead.")

@staticmethod
def read(file_path: Path | str, selection: tuple[str] | None = None) -> SpatialData:
Expand All @@ -1848,44 +1846,6 @@ def read(file_path: Path | str, selection: tuple[str] | None = None) -> SpatialD

return read_zarr(file_path, selection=selection)

def add_image(
self,
name: str,
image: DataArray | DataTree,
storage_options: JSONDict | list[JSONDict] | None = None,
overwrite: bool = False,
) -> None:
"""Deprecated. Use `sdata[name] = image` instead.""" # noqa: D401
_error_message_add_element()

def add_labels(
self,
name: str,
labels: DataArray | DataTree,
storage_options: JSONDict | list[JSONDict] | None = None,
overwrite: bool = False,
) -> None:
"""Deprecated. Use `sdata[name] = labels` instead.""" # noqa: D401
_error_message_add_element()

def add_points(
self,
name: str,
points: DaskDataFrame,
overwrite: bool = False,
) -> None:
"""Deprecated. Use `sdata[name] = points` instead.""" # noqa: D401
_error_message_add_element()

def add_shapes(
self,
name: str,
shapes: GeoDataFrame,
overwrite: bool = False,
) -> None:
"""Deprecated. Use `sdata[name] = shapes` instead.""" # noqa: D401
_error_message_add_element()

@property
def images(self) -> Images:
"""Return images as a Dict of name to image data."""
Expand Down
Loading
Loading