Skip to content

Commit 28d05a2

Browse files
Merge pull request #337 from marcovarrone/xenium-labels-dask
Load Xenium mask labels using Dask
2 parents 5f829f5 + ada3cd3 commit 28d05a2

File tree

4 files changed

+243
-83
lines changed

4 files changed

+243
-83
lines changed

asv.conf.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"version": 1,
3+
"project": "spatialdata-io",
4+
"project_url": "https://github.com/scverse/spatialdata-io",
5+
"repo": ".",
6+
"branches": ["main", "xenium-labels-dask", "xenium-labels-dask-zipstore"],
7+
"dvcs": "git",
8+
"environment_type": "virtualenv",
9+
"pythons": ["3.12"],
10+
"build_command": [],
11+
"install_command": ["python -m pip install {build_dir}[test]"],
12+
"uninstall_command": ["python -m pip uninstall -y {project}"],
13+
"env_dir": ".asv/env",
14+
"results_dir": ".asv/results",
15+
"html_dir": ".asv/html",
16+
"benchmark_dir": "benchmarks",
17+
"hash_length": 8,
18+
"build_cache_size": 2,
19+
"install_timeout": 600,
20+
"repeat": 3,
21+
"processes": 1,
22+
"attribute_selection": ["time_*", "peakmem_*"]
23+
}

benchmarks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# ASV benchmarks for spatialdata-io

benchmarks/bench_xenium.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""Benchmarks for SpatialData IO operations.
2+
3+
Configuration:
4+
Edit SANDBOX_DIR and DATASET below to point to your data.
5+
6+
Setup:
7+
cd <SANDBOX_DIR>/<DATASET>
8+
python download.py # use the same env where spatialdata is installed
9+
10+
Running:
11+
cd /path/to/spatialdata-io
12+
13+
# Quick benchmark (single run, for testing):
14+
asv run --python=same -b IOBenchmark --quick --show-stderr -v
15+
16+
# Full benchmark (multiple runs, for accurate results):
17+
asv run --python=same -b IOBenchmark --show-stderr -v
18+
19+
Comparing branches:
20+
# Run on specific commits:
21+
asv run main^! -b IOBenchmark --show-stderr -v
22+
asv run xenium-labels-dask^! -b IOBenchmark --show-stderr -v
23+
24+
# Or compare two branches directly:
25+
asv continuous main xenium-labels-dask -b IOBenchmark --show-stderr -v
26+
27+
# View comparison:
28+
asv compare main xenium-labels-dask
29+
30+
Results:
31+
- Console output shows timing and memory after each run
32+
- JSON results saved to: .asv/results/
33+
- Generate HTML report: asv publish && asv preview
34+
"""
35+
36+
import inspect
37+
import shutil
38+
from pathlib import Path
39+
from typing import TYPE_CHECKING
40+
41+
from spatialdata import SpatialData
42+
43+
from spatialdata_io import xenium # type: ignore[attr-defined]
44+
45+
# =============================================================================
46+
# CONFIGURATION - Edit these paths to match your setup
47+
# =============================================================================
48+
SANDBOX_DIR = Path(__file__).parent.parent.parent / "spatialdata-sandbox"
49+
DATASET = "xenium_2.0.0_io"
50+
# =============================================================================
51+
52+
53+
def get_paths() -> tuple[Path, Path]:
54+
"""Get paths for benchmark data."""
55+
path = SANDBOX_DIR / DATASET
56+
path_read = path / "data"
57+
path_write = path / "data_benchmark.zarr"
58+
59+
if not path_read.exists():
60+
raise ValueError(f"Data directory not found: {path_read}")
61+
62+
return path_read, path_write
63+
64+
65+
class IOBenchmark:
66+
"""Benchmark IO read operations."""
67+
68+
timeout = 3600
69+
repeat = 3
70+
number = 1
71+
warmup_time = 0
72+
processes = 1
73+
74+
def setup(self) -> None:
75+
"""Set up paths for benchmarking."""
76+
self.path_read, self.path_write = get_paths()
77+
if self.path_write.exists():
78+
shutil.rmtree(self.path_write)
79+
80+
def _read_xenium(self) -> SpatialData:
81+
"""Read xenium data with version-compatible kwargs."""
82+
signature = inspect.signature(xenium)
83+
kwargs = {}
84+
if "cleanup_labels_zarr_tmpdir" in signature.parameters:
85+
kwargs["cleanup_labels_zarr_tmpdir"] = False
86+
87+
return xenium(
88+
path=str(self.path_read),
89+
n_jobs=8,
90+
cell_boundaries=True,
91+
nucleus_boundaries=True,
92+
morphology_focus=True,
93+
cells_as_circles=True,
94+
**kwargs,
95+
)
96+
97+
def time_io(self) -> None:
98+
"""Walltime for data parsing."""
99+
sdata = self._read_xenium()
100+
sdata.write(self.path_write)
101+
102+
def peakmem_io(self) -> None:
103+
"""Peak memory for data parsing."""
104+
sdata = self._read_xenium()
105+
sdata.write(self.path_write)
106+
107+
108+
if __name__ == "__main__":
109+
IOBenchmark().time_io()

0 commit comments

Comments
 (0)