|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 | 3 | import os
|
| 4 | +import shutil |
4 | 5 | from abc import ABC, abstractmethod
|
5 | 6 | from collections.abc import Callable, Sequence
|
6 | 7 | from dataclasses import dataclass, field
|
|
9 | 10 | from typing import Any, TypeAlias, Union
|
10 | 11 |
|
11 | 12 | import anndata
|
| 13 | +import spatialdata as sd |
12 | 14 | from anndata import AnnData
|
13 | 15 | from scanpy import logging as logg
|
14 | 16 | from scanpy import read
|
15 | 17 | from scanpy._utils import check_presence_download
|
16 | 18 |
|
17 | 19 | PathLike: TypeAlias = os.PathLike[str] | str
|
18 | 20 | Function_t: TypeAlias = Callable[..., AnnData | Any]
|
| 21 | +DEFAULT_CACHE_DIR = Path.home() / ".cache" / "squidpy" |
19 | 22 |
|
20 | 23 |
|
21 | 24 | @dataclass(frozen=True)
|
@@ -177,3 +180,42 @@ def _download(self, fpath: PathLike, backup_url: str, **kwargs: Any) -> Any:
|
177 | 180 | @property
|
178 | 181 | def _extension(self) -> str:
|
179 | 182 | return ".tiff"
|
| 183 | + |
| 184 | + |
| 185 | +def _get_zipped_dataset(folderpath: Path, dataset_name: str, figshare_id: str) -> sd.SpatialData: |
| 186 | + """Returns a specific dataset as SpatialData object. If the file is not present on disk, it will be downloaded and extracted.""" |
| 187 | + |
| 188 | + if not folderpath.is_dir(): |
| 189 | + raise ValueError(f"Expected a directory path for `folderpath`, found: {folderpath}") |
| 190 | + |
| 191 | + download_zip = folderpath / f"{dataset_name}.zip" |
| 192 | + extracted_path = folderpath / f"{dataset_name}.zarr" |
| 193 | + |
| 194 | + # Return early if data is already extracted |
| 195 | + if extracted_path.exists(): |
| 196 | + logg.info(f"Loading existing dataset from {extracted_path}") |
| 197 | + return sd.read_zarr(extracted_path) |
| 198 | + |
| 199 | + # Download if necessary |
| 200 | + if not download_zip.exists(): |
| 201 | + logg.info(f"Downloading Visium H&E SpatialData to {download_zip}") |
| 202 | + try: |
| 203 | + check_presence_download( |
| 204 | + filename=download_zip, |
| 205 | + backup_url=f"https://ndownloader.figshare.com/files/{figshare_id}", |
| 206 | + ) |
| 207 | + except Exception as e: |
| 208 | + raise RuntimeError(f"Failed to download dataset: {e}") from e |
| 209 | + |
| 210 | + # Extract if necessary |
| 211 | + if not extracted_path.exists(): |
| 212 | + logg.info(f"Extracting dataset from {download_zip} to {extracted_path}") |
| 213 | + try: |
| 214 | + shutil.unpack_archive(str(download_zip), folderpath) |
| 215 | + except Exception as e: |
| 216 | + raise RuntimeError(f"Failed to extract dataset: {e}") from e |
| 217 | + |
| 218 | + if not extracted_path.exists(): |
| 219 | + raise RuntimeError(f"Expected extracted data at {extracted_path}, but not found") |
| 220 | + |
| 221 | + return sd.read_zarr(extracted_path) |
0 commit comments