|
| 1 | +"""Benchmarks for SpatialData IO operations. |
| 2 | +
|
| 3 | +Configuration: |
| 4 | + Edit SANDBOX_DIR and DATASET below to point to your data. |
| 5 | +
|
| 6 | +Setup: |
| 7 | + cd <SANDBOX_DIR>/<DATASET> |
| 8 | + python download.py # use the same env where spatialdata is installed |
| 9 | +
|
| 10 | +Running: |
| 11 | + cd /path/to/spatialdata-io |
| 12 | +
|
| 13 | + # Quick benchmark (single run, for testing): |
| 14 | + asv run --python=same -b IOBenchmark --quick --show-stderr -v |
| 15 | +
|
| 16 | + # Full benchmark (multiple runs, for accurate results): |
| 17 | + asv run --python=same -b IOBenchmark --show-stderr -v |
| 18 | +
|
| 19 | +Comparing branches: |
| 20 | + # Run on specific commits: |
| 21 | + asv run main^! -b IOBenchmark --show-stderr -v |
| 22 | + asv run xenium-labels-dask^! -b IOBenchmark --show-stderr -v |
| 23 | +
|
| 24 | + # Or compare two branches directly: |
| 25 | + asv continuous main xenium-labels-dask -b IOBenchmark --show-stderr -v |
| 26 | +
|
| 27 | + # View comparison: |
| 28 | + asv compare main xenium-labels-dask |
| 29 | +
|
| 30 | +Results: |
| 31 | + - Console output shows timing and memory after each run |
| 32 | + - JSON results saved to: .asv/results/ |
| 33 | + - Generate HTML report: asv publish && asv preview |
| 34 | +""" |
| 35 | + |
| 36 | +import inspect |
| 37 | +import shutil |
| 38 | +from pathlib import Path |
| 39 | +from typing import TYPE_CHECKING |
| 40 | + |
| 41 | +from spatialdata import SpatialData |
| 42 | + |
| 43 | +from spatialdata_io import xenium # type: ignore[attr-defined] |
| 44 | + |
| 45 | +# ============================================================================= |
| 46 | +# CONFIGURATION - Edit these paths to match your setup |
| 47 | +# ============================================================================= |
| 48 | +SANDBOX_DIR = Path(__file__).parent.parent.parent / "spatialdata-sandbox" |
| 49 | +DATASET = "xenium_2.0.0_io" |
| 50 | +# ============================================================================= |
| 51 | + |
| 52 | + |
| 53 | +def get_paths() -> tuple[Path, Path]: |
| 54 | + """Get paths for benchmark data.""" |
| 55 | + path = SANDBOX_DIR / DATASET |
| 56 | + path_read = path / "data" |
| 57 | + path_write = path / "data_benchmark.zarr" |
| 58 | + |
| 59 | + if not path_read.exists(): |
| 60 | + raise ValueError(f"Data directory not found: {path_read}") |
| 61 | + |
| 62 | + return path_read, path_write |
| 63 | + |
| 64 | + |
| 65 | +class IOBenchmark: |
| 66 | + """Benchmark IO read operations.""" |
| 67 | + |
| 68 | + timeout = 3600 |
| 69 | + repeat = 3 |
| 70 | + number = 1 |
| 71 | + warmup_time = 0 |
| 72 | + processes = 1 |
| 73 | + |
| 74 | + def setup(self) -> None: |
| 75 | + """Set up paths for benchmarking.""" |
| 76 | + self.path_read, self.path_write = get_paths() |
| 77 | + if self.path_write.exists(): |
| 78 | + shutil.rmtree(self.path_write) |
| 79 | + |
| 80 | + def _read_xenium(self) -> SpatialData: |
| 81 | + """Read xenium data with version-compatible kwargs.""" |
| 82 | + signature = inspect.signature(xenium) |
| 83 | + kwargs = {} |
| 84 | + if "cleanup_labels_zarr_tmpdir" in signature.parameters: |
| 85 | + kwargs["cleanup_labels_zarr_tmpdir"] = False |
| 86 | + |
| 87 | + return xenium( |
| 88 | + path=str(self.path_read), |
| 89 | + n_jobs=8, |
| 90 | + cell_boundaries=True, |
| 91 | + nucleus_boundaries=True, |
| 92 | + morphology_focus=True, |
| 93 | + cells_as_circles=True, |
| 94 | + **kwargs, |
| 95 | + ) |
| 96 | + |
| 97 | + def time_io(self) -> None: |
| 98 | + """Walltime for data parsing.""" |
| 99 | + sdata = self._read_xenium() |
| 100 | + sdata.write(self.path_write) |
| 101 | + |
| 102 | + def peakmem_io(self) -> None: |
| 103 | + """Peak memory for data parsing.""" |
| 104 | + sdata = self._read_xenium() |
| 105 | + sdata.write(self.path_write) |
| 106 | + |
| 107 | + |
| 108 | +if __name__ == "__main__": |
| 109 | + IOBenchmark().time_io() |
0 commit comments