Merge pull request #354 from gustaveroussy/no_owerwrite_prior

quentinblampey · web-flow · commit 182f3e1a34ae · 2025-11-14T14:34:09.000+01:00
Do not overwrite the prior shape key in the transcript dataframe
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@
 ### Added
 - Setting `sopa.settings.simplification_tolerance` to change the default shapely tolerance. For instance, set it to `0.1` for low simplification, or `0` for no simplification (#340)
 - Add an argument to load cells_boundaries and cells_table in `sopa.io.merscope` (`False` by default) (#346)
+- Do not overwrite the prior shape key in the transcript dataframe (useful if running multiple times `make_transcript_patches`)
 
 ## [2.1.8] - 2025-10-04
 
diff --git a/sopa/_constants.py b/sopa/_constants.py
@@ -15,6 +15,7 @@ class SopaKeys:
     PATCHES_ILOCS = "ilocs"
     ROI = "region_of_interest"
     PRIOR_SHAPES_KEY = "prior_shapes_key"
+    SOPA_PRIOR = "sopa_prior"
     POINTS_KEY = "points_key"
 
     # Other SpatialData keys
diff --git a/sopa/patches/_transcripts.py b/sopa/patches/_transcripts.py
@@ -49,8 +49,10 @@ def __init__(
 
         self.prior_shapes_key = prior_shapes_key
         self.unassigned_value = unassigned_value
+
         self.min_points_per_patch = min_points_per_patch
         self.min_cells_per_patch = min_cells_per_patch
+
         self.csv_name = csv_name
         self.centroids_csv_name = centroids_csv_name
         self.write_cells_centroids = write_cells_centroids
@@ -83,15 +85,16 @@ def assign_prior_segmentation(self) -> None:
                 "Unassigned value is not needed when using a prior segmentation based on existing shapes"
             )
 
-            return assign_transcript_to_cell(
-                self.sdata, self.points_key, self.prior_shapes_key, self.prior_shapes_key, unassigned_value=0
+            assign_transcript_to_cell(
+                self.sdata, self.points_key, self.prior_shapes_key, SopaKeys.SOPA_PRIOR, unassigned_value=0
             )
+            return
 
         assert self.prior_shapes_key in self.points.columns, (
             f"Prior-segmentation column {self.prior_shapes_key} not found in sdata['{self.points_key}']"
         )
 
-        self.points[self.prior_shapes_key] = _unassigned_to_zero(
+        self.points[SopaKeys.SOPA_PRIOR] = _unassigned_to_zero(
             self.points[self.prior_shapes_key], self.unassigned_value
         )
 
@@ -109,7 +112,7 @@ def get_prior_centroids(self) -> gpd.GeoDataFrame:
                 "x": centroids.geometry.x,
                 "y": centroids.geometry.y,
                 "z": 0,
-                self.prior_shapes_key: range(1, len(centroids) + 1),
+                SopaKeys.SOPA_PRIOR: range(1, len(centroids) + 1),
             },
             geometry=centroids,
         )
diff --git a/sopa/segmentation/_transcripts.py b/sopa/segmentation/_transcripts.py
@@ -196,6 +196,6 @@ def _check_transcript_patches(sdata: SpatialData, with_prior: bool = False):
 
     if with_prior:
         assert SopaKeys.PRIOR_SHAPES_KEY in sdata[SopaKeys.TRANSCRIPTS_PATCHES].columns, (
-            "You need to create the transcript patches with a `prior_shapes_key`. "
-            "For that, you can run cellpose first, and then run again `sopa.make_transcript_patches` with `prior_shapes_key='cellpose_boundaries'`"
+            "You need to run `sopa.make_transcript_patches` with a `prior_shapes_key`. "
+            "You can provide `prior_shapes_key='auto'` if your technology has a prior segmentation, or `prior_shapes_key='cellpose_boundaries'` if you ran cellpose segmentation first."
         )
diff --git a/sopa/segmentation/methods/_baysor.py b/sopa/segmentation/methods/_baysor.py
@@ -49,9 +49,9 @@ def baysor(
     """
     _check_transcript_patches(sdata)
 
-    prior_shapes_key = None
-    if SopaKeys.PRIOR_SHAPES_KEY in sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES]:
-        prior_shapes_key = sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.PRIOR_SHAPES_KEY].iloc[0]
+    prior_shapes_key = (
+        SopaKeys.SOPA_PRIOR if SopaKeys.PRIOR_SHAPES_KEY in sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES] else None
+    )
 
     if config is None or not len(config):
         config = _get_default_config(sdata, prior_shapes_key, scale)
diff --git a/sopa/segmentation/methods/_comseg.py b/sopa/segmentation/methods/_comseg.py
@@ -58,7 +58,7 @@ def comseg(
 
     assert "gene_column" in config, "'gene_column' not found in config"
 
-    config["prior_name"] = sdata[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.PRIOR_SHAPES_KEY].iloc[0]
+    config["prior_name"] = SopaKeys.SOPA_PRIOR
 
     if patch_index is not None:
         patch_dir = get_transcripts_patches_dirs(sdata, patch_index)
diff --git a/sopa/segmentation/methods/_proseg.py b/sopa/segmentation/methods/_proseg.py
@@ -44,7 +44,7 @@ def proseg(
         infer_presets: Whether to infer the proseg presets based on the columns of the transcripts dataframe.
         key_added: Name of the shapes element to be added to `sdata.shapes`.
     """
-    _check_transcript_patches(sdata)
+    _check_transcript_patches(sdata, with_prior=True)
 
     points_key = sdata[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.POINTS_KEY].iloc[0]
 
@@ -98,20 +98,14 @@ def _get_proseg_command(
 ) -> str:
     proseg_executable = _get_executable_path("proseg", ".cargo")
 
-    assert SopaKeys.PRIOR_SHAPES_KEY in sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES], (
-        "Proseg requires a prior. Re-run `sopa.make_transcript_patches` with a `prior_shapes_key`."
-    )
-
-    prior_shapes_key = sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.PRIOR_SHAPES_KEY].iloc[0]
-
     feature_key = get_feature_key(sdata[points_key], raise_error=True)
 
     use_zarr = _use_zarr_output(proseg_executable)
 
     if infer_presets:
         command_line_suffix = _add_presets(command_line_suffix, sdata[points_key].columns)
 
-    return f"{proseg_executable} transcripts.csv -x x -y y -z z --gene-column {feature_key} --cell-id-column {prior_shapes_key} --cell-id-unassigned 0 {'--exclude-spatialdata-transcripts' if use_zarr else ''} {command_line_suffix}"
+    return f"{proseg_executable} transcripts.csv -x x -y y -z z --gene-column {feature_key} --cell-id-column {SopaKeys.SOPA_PRIOR} --cell-id-unassigned 0 {'--exclude-spatialdata-transcripts' if use_zarr else ''} {command_line_suffix}"
 
 
 def _add_presets(command_line_suffix: str, columns: list[str]) -> str:

Original file line number	Diff line number	Diff line change
`@@ -196,6 +196,6 @@ def _check_transcript_patches(sdata: SpatialData, with_prior: bool = False):`
`196`	`196`
`197`	`197`	`if with_prior:`
`198`	`198`	`assert SopaKeys.PRIOR_SHAPES_KEY in sdata[SopaKeys.TRANSCRIPTS_PATCHES].columns, (`
`199`		- "You need to create the transcript patches with a `prior_shapes_key`. "
`200`		- "For that, you can run cellpose first, and then run again `sopa.make_transcript_patches` with `prior_shapes_key='cellpose_boundaries'`"
	`199`	+ "You need to run `sopa.make_transcript_patches` with a `prior_shapes_key`. "
	`200`	+ "You can provide `prior_shapes_key='auto'` if your technology has a prior segmentation, or `prior_shapes_key='cellpose_boundaries'` if you ran cellpose segmentation first."
`201`	`201`	`)`