Skip to content

Commit 7724871

Browse files
Merge pull request #264 from ddiez/fix_visium_hd
Fix visium_hd for empty dataset_id (default spaceranger output).
2 parents 625c77c + 70f5060 commit 7724871

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

src/spatialdata_io/readers/visium.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def visium(
3333
fullres_image_file: str | Path | None = None,
3434
tissue_positions_file: str | Path | None = None,
3535
scalefactors_file: str | Path | None = None,
36+
var_names_make_unique: bool = True,
3637
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
3738
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
3839
**kwargs: Any,
@@ -72,6 +73,8 @@ def visium(
7273
Path to the tissue positions file.
7374
scalefactors_file
7475
Path to the scalefactors file.
76+
var_names_make_unique
77+
If `True`, call `.var_names_make_unique()` on each `AnnData` table.
7578
imread_kwargs
7679
Keyword arguments passed to :func:`dask_image.imread.imread`.
7780
image_models_kwargs
@@ -114,7 +117,10 @@ def visium(
114117
assert counts_file is not None
115118

116119
if library_id is None and dataset_id is None:
117-
raise ValueError("Cannot determine the `library_id`. Please provide `dataset_id`.")
120+
raise ValueError(
121+
"Cannot determine the `library_id`. Please provide `dataset_id`; the `dataset_id` value will be used to "
122+
"name the elements in the `SpatialData` object."
123+
)
118124

119125
if dataset_id is not None:
120126
if dataset_id != library_id and library_id is not None:
@@ -210,6 +216,8 @@ def visium(
210216
shapes[dataset_id] = circles
211217
adata.obs["region"] = dataset_id
212218
table = TableModel.parse(adata, region=dataset_id, region_key="region", instance_key="spot_id")
219+
if var_names_make_unique:
220+
table.var_names_make_unique()
213221

214222
images = {}
215223
if fullres_image_file is not None:

src/spatialdata_io/readers/visium_hd.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def visium_hd(
4545
annotate_table_by_labels: bool = False,
4646
fullres_image_file: str | Path | None = None,
4747
load_all_images: bool = False,
48+
var_names_make_unique: bool = True,
4849
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
4950
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
5051
anndata_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -61,7 +62,8 @@ def visium_hd(
6162
path
6263
Path to directory containing the *10x Genomics* Visium HD output.
6364
dataset_id
64-
Unique identifier of the dataset. If `None`, it tries to infer it from the file name of the feature slice file.
65+
Unique identifier of the dataset, used to name the elements of the `SpatialData` object. If `None`, it tries to
66+
infer it from the file name of the feature slice file.
6567
filtered_counts_file
6668
It sets the value of `counts_file` to ``{vx.FILTERED_COUNTS_FILE!r}`` (when `True`) or to
6769
``{vx.RAW_COUNTS_FILE!r}`` (when `False`).
@@ -80,6 +82,8 @@ def visium_hd(
8082
load_all_images
8183
If `False`, load only the full resolution, high resolution and low resolution images. If `True`, also the
8284
following images: ``{vx.IMAGE_CYTASSIST!r}``.
85+
var_names_make_unique
86+
If `True`, call `.var_names_make_unique()` on each `AnnData` table.
8387
imread_kwargs
8488
Keyword arguments for :func:`imageio.imread`.
8589
image_models_kwargs
@@ -100,7 +104,8 @@ def visium_hd(
100104

101105
if dataset_id is None:
102106
dataset_id = _infer_dataset_id(path)
103-
filename_prefix = f"{dataset_id}_"
107+
108+
filename_prefix = _get_filename_prefix(path, dataset_id)
104109

105110
def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None) -> None:
106111
_load_image(
@@ -265,6 +270,8 @@ def _get_bins(path_bins: Path) -> list[str]:
265270
region_key=str(VisiumHDKeys.REGION_KEY),
266271
instance_key=str(VisiumHDKeys.INSTANCE_KEY),
267272
)
273+
if var_names_make_unique:
274+
tables[bin_size_str].var_names_make_unique()
268275

269276
# read full resolution image
270277
if fullres_image_file is not None:
@@ -388,7 +395,8 @@ def _infer_dataset_id(path: Path) -> str:
388395
files = [file.name for file in path.iterdir() if file.is_file() and file.name.endswith(suffix)]
389396
if len(files) == 0 or len(files) > 1:
390397
raise ValueError(
391-
f"Cannot infer `dataset_id` from the feature slice file in {path}, please pass `dataset_id` as an argument."
398+
f"Cannot infer `dataset_id` from the feature slice file in {path}, please pass `dataset_id` as an "
399+
f"argument. The `dataset_id` value will be used to name the elements in the `SpatialData` object."
392400
)
393401
return files[0].replace(suffix, "")
394402

@@ -440,6 +448,16 @@ def _get_affine(coefficients: list[int]) -> Affine:
440448
return Affine(matrix, input_axes=("x", "y"), output_axes=("x", "y"))
441449

442450

451+
def _get_filename_prefix(path: Path, dataset_id: str) -> str:
452+
if (path / f"{dataset_id}_{VisiumHDKeys.FEATURE_SLICE_FILE.value}").exists():
453+
return f"{dataset_id}_"
454+
assert (path / VisiumHDKeys.FEATURE_SLICE_FILE.value).exists(), (
455+
f"Cannot locate the feature slice file, please ensure the file is present in the {path} directory and/or adjust"
456+
"the `dataset_id` parameter"
457+
)
458+
return ""
459+
460+
443461
def _parse_metadata(path: Path, filename_prefix: str) -> tuple[dict[str, Any], dict[str, Any]]:
444462
with h5py.File(path / f"{filename_prefix}{VisiumHDKeys.FEATURE_SLICE_FILE.value}", "r") as f5:
445463
metadata = json.loads(dict(f5.attrs)[VisiumHDKeys.METADATA_JSON])

0 commit comments

Comments
 (0)