Merge branch 'main' of https://github.com/ckmah/spatialdata-io into main

xyi · xyi · commit 273accad8094 · 2025-05-30T07:36:31.000-07:00
diff --git a/src/spatialdata_io/readers/g4x.py b/src/spatialdata_io/readers/g4x.py
@@ -2,11 +2,10 @@
 
 import re
 from pathlib import Path
-from typing import Union
 
 import dask.dataframe as dd
-import numpy as np
 import glymur
+import numpy as np
 import PIL
 from anndata.io import read_h5ad
 from dask.array.image import imread
@@ -82,25 +81,15 @@ def g4x(
         output_path = Path(output_path)
 
     # Determine if input_path is a run directory or a single sample directory
-    if any(
-        p.is_dir() and re.match(r"[A-Z][0-9]{2}", p.name) for p in input_path.iterdir()
-    ):
+    if any(p.is_dir() and re.match(r"[A-Z][0-9]{2}", p.name) for p in input_path.iterdir()):
         # Run directory with multiple samples
-        sample_input_paths = [
-            p
-            for p in input_path.iterdir()
-            if p.is_dir() and re.match(r"[A-Z][0-9]{2}", p.name)
-        ]
+        sample_input_paths = [p for p in input_path.iterdir() if p.is_dir() and re.match(r"[A-Z][0-9]{2}", p.name)]
         logger.debug(f"Found {len(sample_input_paths)} samples.")
 
         if output_path is None:
-            sample_output_paths = [
-                input_path / p.name / f"{p.name}.zarr" for p in sample_input_paths
-            ]
+            sample_output_paths = [input_path / p.name / f"{p.name}.zarr" for p in sample_input_paths]
         else:
-            sample_output_paths = [
-                output_path / f"{p.name}.zarr" for p in sample_input_paths
-            ]
+            sample_output_paths = [output_path / f"{p.name}.zarr" for p in sample_input_paths]
 
         sdatas = []
         for sample_input_path, sample_output_path in tqdm(
@@ -190,9 +179,7 @@ def g4x_sample(
         output_zarr_path = Path(output_zarr_path)
         if output_zarr_path.suffix != ".zarr":
             logger.error(f"Output path must end with '.zarr'. Got {output_zarr_path}")
-            raise ValueError(
-                f"Output path must end with '.zarr'. Got {output_zarr_path}"
-            )
+            raise ValueError(f"Output path must end with '.zarr'. Got {output_zarr_path}")
 
     if mode not in ["append", "overwrite"]:
         msg = f"Invalid mode '{mode}'. Must be one of: 'append', 'overwrite'"
@@ -431,20 +418,15 @@ def _write_segmentation(
         f"shapes/{shapes_nuclei_exp_key}",
     ]
 
-    if mode == "append" and any(
-        p in sdata.elements_paths_on_disk() for p in elements_paths
-    ):
+    if mode == "append" and any(p in sdata.elements_paths_on_disk() for p in elements_paths):
         logger.debug("Segmentation already exist. Skipping...")
         return
     elif mode == "overwrite":
         logger.debug("Deleting existing segmentation elements")
         for el in elements:
             if el in sdata:
                 del sdata[el]
-            if (
-                f"labels/{el}" in sdata.elements_paths_on_disk()
-                or f"shapes/{el}" in sdata.elements_paths_on_disk()
-            ):
+            if f"labels/{el}" in sdata.elements_paths_on_disk() or f"shapes/{el}" in sdata.elements_paths_on_disk():
                 sdata.delete_element_from_disk(el)
 
     # Load and process segmentation data
@@ -466,16 +448,12 @@ def _write_segmentation(
 
     # Nuclei shapes
     sdata[shapes_nuclei_key] = to_polygons(sdata[nuclei_key]).set_index("label")
-    sdata[shapes_nuclei_key].geometry = sdata[shapes_nuclei_key].translate(
-        xoff=offset, yoff=offset
-    )
+    sdata[shapes_nuclei_key].geometry = sdata[shapes_nuclei_key].translate(xoff=offset, yoff=offset)
     sdata[shapes_nuclei_key].index = sdata[shapes_nuclei_key].index.astype(str)
 
     # Expanded nuclei shapes
     sdata[shapes_nuclei_exp_key] = to_polygons(sdata[nuclei_exp_key]).set_index("label")
-    sdata[shapes_nuclei_exp_key].geometry = sdata[shapes_nuclei_exp_key].translate(
-        xoff=offset, yoff=offset
-    )
+    sdata[shapes_nuclei_exp_key].geometry = sdata[shapes_nuclei_exp_key].translate(xoff=offset, yoff=offset)
     sdata[shapes_nuclei_exp_key].index = sdata[shapes_nuclei_exp_key].index.astype(str)
 
     logger.debug("Writing elements")
@@ -520,9 +498,7 @@ def _write_protein_images(
     img_list.sort()
 
     if not img_list:
-        logger.warning(
-            f"No protein images found matching pattern '{pattern}' in {protein_dir}"
-        )
+        logger.warning(f"No protein images found matching pattern '{pattern}' in {protein_dir}")
         return
     logger.debug(f"Found {len(img_list)} protein images")
 
@@ -554,9 +530,7 @@ def _write_protein_images(
 
     # Create Image2DModel and write
     logger.debug("Converting to Image2DModel")
-    sdata[G4XKeys.PROTEIN_KEY.v] = Image2DModel.parse(
-        protein_stack, c_coords=channel_names, **kwargs
-    )
+    sdata[G4XKeys.PROTEIN_KEY.v] = Image2DModel.parse(protein_stack, c_coords=channel_names, **kwargs)
 
     logger.debug("Writing protein images")
     sdata.write_element(G4XKeys.PROTEIN_KEY.v)
@@ -629,9 +603,7 @@ def _write_transcripts(
         logger.debug(f"swap_xy: {swap_xy}, {type(swap_xy)}")
         if swap_xy:
             logger.debug("Swapping x and y coordinates")
-            transcripts[[coordinates["x"], coordinates["y"]]] = transcripts[
-                [coordinates["y"], coordinates["x"]]
-            ]
+            transcripts[[coordinates["x"], coordinates["y"]]] = transcripts[[coordinates["y"], coordinates["x"]]]
 
         pbar.set_description("Converting to PointsModel")
         sdata[G4XKeys.TRANSCRIPTS_KEY.v] = PointsModel.parse(
@@ -708,11 +680,7 @@ def _deep_update(base_dict, update_dict):
     Recursively update a dictionary with another dictionary.
     """
     for key, value in update_dict.items():
-        if (
-            isinstance(value, dict)
-            and key in base_dict
-            and isinstance(base_dict[key], dict)
-        ):
+        if isinstance(value, dict) and key in base_dict and isinstance(base_dict[key], dict):
             _deep_update(base_dict[key], value)
         else:
             base_dict[key] = value