From 132629959e3554318dcd7792a8434579b360e9b5 Mon Sep 17 00:00:00 2001 From: laudmt Date: Mon, 17 Mar 2025 11:59:32 +0100 Subject: [PATCH 1/2] improve cosmx loading --- src/spatialdata_io/readers/cosmx.py | 56 ++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 561fe914..3f6c9287 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -16,7 +16,7 @@ from dask_image.imread import imread from scipy.sparse import csr_matrix from skimage.transform import estimate_transform -from spatialdata import SpatialData +from spatialdata import SpatialData, read_zarr from spatialdata._logging import logger from spatialdata.models import Image2DModel, Labels2DModel, PointsModel, TableModel from spatialdata.transformations.transformations import Affine, Identity @@ -34,6 +34,7 @@ def cosmx( transcripts: bool = True, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), + output_path: str | Path | None = None, ) -> SpatialData: """ Read *Cosmx Nanostring* data. @@ -62,12 +63,20 @@ def cosmx( Keyword arguments passed to :func:`dask_image.imread.imread`. image_models_kwargs Keyword arguments passed to :class:`spatialdata.models.Image2DModel`. + output_path + Path where the output will be saved. If ``None``, the output will not be saved. Returns ------- :class:`spatialdata.SpatialData` """ path = Path(path) + output_path = Path(output_path) if output_path is not None else None + sdata = SpatialData() + + # If output path is provided, save the empty SpatialData object to create directories and hierarchy + if output_path is not None: + sdata.write(output_path) # tries to infer dataset_id from the name of the counts file if dataset_id is None: @@ -151,6 +160,14 @@ def cosmx( inplace=True, ) + # Add table to SpatialData object, write it and delete temporary objects to save memory + sdata.tables["table"] = table + if output_path is not None: + sdata.write_element(element_name="table") + del adata + del table + del sdata.tables + # prepare to read images and labels file_extensions = (".jpg", ".png", ".jpeg", ".tif", ".tiff") pat = re.compile(r".*_F(\d+)") @@ -195,7 +212,14 @@ def cosmx( rgb=None, **image_models_kwargs, ) - images[f"{fov}_image"] = parsed_im + image_name = f"{fov}_image" + images[image_name] = parsed_im + if output_path is not None: + sdata.images[image_name] = parsed_im + sdata.write_element(element_name=image_name) + del parsed_im + del images[image_name] + del sdata.images[image_name] else: logger.warning(f"FOV {fov} not found in counts file. Skipping image {fname}.") @@ -218,7 +242,14 @@ def cosmx( dims=("y", "x"), **image_models_kwargs, ) - labels[f"{fov}_labels"] = parsed_la + label_name = f"{fov}_labels" + labels[label_name] = parsed_la + if output_path is not None: + sdata.labels[label_name] = parsed_la + sdata.write_element(element_name=label_name) + del parsed_la + del labels[label_name] + del sdata.labels[label_name] else: logger.warning(f"FOV {fov} not found in counts file. Skipping labels {fname}.") @@ -265,7 +296,8 @@ def cosmx( # we rename z because we want to treat the data as 2d sub_table.rename(columns={"z": "z_raw"}, inplace=True) if len(sub_table) > 0: - points[f"{fov}_points"] = PointsModel.parse( + point_name = f"{fov}_points" + points[point_name] = PointsModel.parse( sub_table, coordinates={"x": CosmxKeys.X_LOCAL_TRANSCRIPT, "y": CosmxKeys.Y_LOCAL_TRANSCRIPT}, feature_key=CosmxKeys.TARGET_OF_TRANSCRIPT, @@ -276,6 +308,11 @@ def cosmx( "global_only_labels": aff, }, ) + if output_path is not None: + sdata.points[point_name] = points[point_name] + sdata.write_element(element_name=point_name) + del points[point_name] + del sdata.points[point_name] # TODO: what to do with fov file? # if fov_file is not None: @@ -286,5 +323,14 @@ def cosmx( # except KeyError: # logg.warning(f"FOV `{str(fov)}` does not exist, skipping it.") # continue - + if output_path is not None: + return read_zarr(output_path) return SpatialData(images=images, labels=labels, points=points, table=table) + +if __name__ == "__main__": + cosmx( + path="/Users/ldumont/git/cosmx_data", + dataset_id="1", + transcripts=True, + output_path="/Users/ldumont/cosmx_data_output", + ) From 1ea020e820922fcb5942a57e8cdf11fbc82a8a07 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Mar 2025 11:23:10 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 3 +-- src/spatialdata_io/readers/cosmx.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 02213221..f598404d 100644 --- a/README.md +++ b/README.md @@ -111,8 +111,7 @@ Marconato, L., Palla, G., Yamauchi, K.A. et al. SpatialData: an open and univers [link-docs]: https://spatialdata.scverse.org/projects/io/en/latest/ [link-api]: https://spatialdata.scverse.org/projects/io/en/latest/api.html [link-cli]: https://spatialdata.scverse.org/projects/io/en/latest/cli.html - -[//]: # (numfocus-fiscal-sponsor-attribution) +[//]: # "numfocus-fiscal-sponsor-attribution" spatialdata-io is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs. diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 3f6c9287..217897f5 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -327,6 +327,7 @@ def cosmx( return read_zarr(output_path) return SpatialData(images=images, labels=labels, points=points, table=table) + if __name__ == "__main__": cosmx( path="/Users/ldumont/git/cosmx_data",