Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4ba2ff8
Add draft function to mask tissue regions
Nov 11, 2024
8b224eb
[FEATURE] store filepath when reading h5sc files
sophiamaedler Nov 3, 2025
33ff5b3
[FIX] if no axes object is provided to plot_shapes correctly read dim…
sophiamaedler Nov 3, 2025
3fc534b
add functionality to update obs to disk
sophiamaedler Nov 6, 2025
d20b5ac
add plotting support for non memory backed h5sc files
sophiamaedler Nov 6, 2025
fd66e0f
Merge branch 'main' into add_tutorial_roi_regions
sophiamaedler Nov 6, 2025
14b0bc3
rename function
sophiamaedler Nov 6, 2025
ec810f8
parametrize function
sophiamaedler Nov 6, 2025
c67cade
add print statement
sophiamaedler Nov 6, 2025
bee60c7
Merge pull request #104 from MannLabs/Add_tissue_region_masking
sophiamaedler Nov 8, 2025
60b1f52
simplify code structure and remove duplicate function
sophiamaedler Nov 8, 2025
f6d636a
[FEATURE] add functions to subset h5sc objects
sophiamaedler Nov 8, 2025
7efb1ff
ruff linting and fix pre-commit issues
sophiamaedler Nov 8, 2025
ba50312
add general synthetic test objects that closely mimic real objects
sophiamaedler Nov 8, 2025
471a8e6
[TEST] improve h5sc plotting tests
sophiamaedler Nov 8, 2025
04ca7e8
[TESTS] add tests for h5sc operations
sophiamaedler Nov 8, 2025
544e4c5
Merge branch 'main' into add_tutorial_roi_regions
sophiamaedler Nov 9, 2025
edd376c
Merge branch 'main' into add_tutorial_roi_regions
sophiamaedler Jan 4, 2026
9823f94
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 4, 2026
37bef6d
Merge branch 'main' into add_tutorial_roi_regions
sophiamaedler Jan 18, 2026
d3f488e
[FIX] incorrect warning condition
sophiamaedler Jan 18, 2026
e746ca5
[FIX] docstrings and import statements
sophiamaedler Jan 18, 2026
c3ab5b5
[FEATURE] first working version of the mask_region function
sophiamaedler Jan 18, 2026
6c35a42
[TEST] add tests for _subset module
sophiamaedler Jan 18, 2026
f95f48f
[FIX] update requirements to support new mask region function
sophiamaedler Jan 18, 2026
15cc8ae
[FIX] add missing requirement
sophiamaedler Jan 18, 2026
3b85f15
[FIX] also support numpy obsm values not just hdf5 backed
sophiamaedler Jan 18, 2026
3fa889b
Merge branch 'main' into add_tutorial_roi_regions
sophiamaedler Feb 5, 2026
8e1b423
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ anndata<0.12
spatialdata>=0.3.0,<0.6
pyarrow<22.0.0
py-lmd>=1.3.1
affine
rasterio

spatialdata-plot>=0.2.14
matplotlib
Expand Down
4 changes: 4 additions & 0 deletions src/scportrait/io/h5sc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from scportrait.pipeline._utils.constants import (
DEFAULT_CELL_ID_NAME,
DEFAULT_IDENTIFIER_FILENAME,
DEFAULT_NAME_SINGLE_CELL_IMAGES,
DEFAULT_SEGMENTATION_DTYPE,
DEFAULT_SINGLE_CELL_IMAGE_DTYPE,
Expand Down Expand Up @@ -55,6 +56,9 @@ def read_h5sc(filename: str | Path) -> AnnData:
_clean_uns(adata)

adata.obsm[DEFAULT_NAME_SINGLE_CELL_IMAGES] = f.get(IMAGE_DATACONTAINER_NAME)
adata.uns[DEFAULT_IDENTIFIER_FILENAME] = filename
adata.uns["_h5sc_file_handle"] = f

return adata


Expand Down
1 change: 1 addition & 0 deletions src/scportrait/pipeline/_utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
IMAGE_DATACONTAINER_NAME = f"obsm/{DEFAULT_NAME_SINGLE_CELL_IMAGES}"
DEFAULT_CELL_ID_NAME = "scportrait_cell_id"
INDEX_DATACONTAINER_NAME = f"obs/{DEFAULT_CELL_ID_NAME}"
DEFAULT_IDENTIFIER_FILENAME = "h5sc_source_path"

DEFAULT_IMAGE_DTYPE: np.dtype = np.uint16
DEFAULT_SEGMENTATION_DTYPE: np.dtype = np.uint64
Expand Down
1 change: 1 addition & 0 deletions src/scportrait/pipeline/_utils/spatialdata_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def calculate_centroids(mask: xarray.DataArray, coordinate_system: str = "global
transform = get_transformation(mask, coordinate_system)

if check_memory(mask):
print("Array fits in memory, using in-memory calculation.")
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Debug print statement should be removed or replaced with proper logging. For production code, use the logging module to allow users to control output verbosity.

Copilot uses AI. Check for mistakes.
centers, _, _ids = numba_mask_centroid(mask.values)
return make_centers_object(centers, _ids, transform, coordinate_system)

Expand Down
102 changes: 102 additions & 0 deletions src/scportrait/pipeline/mask_filtering/region_masking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import dask
import numpy as np
import rasterio
import spatialdata as sd
from napari_spatialdata import Interactive
from rasterio.features import geometry_mask
from scipy import ndimage
from shapely import unary_union
from shapely.geometry import Polygon, mapping
from skimage.draw import disk
from skimage.measure import find_contours
from skimage.segmentation import watershed
from spatialdata.models import Image2DModel


def mask_image(sdata, image, mask, invert, automatic_masking, threshold, overwrite, masked_image_name):
"""
Given an image and mask, either masks or crops the image.

Parameters
----------
sdata : sd.SpatialData
spatialdata object containing the image and mask.
image : str
Name of the image in sdata.images to mask.
mask : str | shapely.geometry.Polygon
Mask, either str of the name of the shape in sdata.shapes or a shapely polygon.
invert : bool
If True, inverts the mask, such that only pixels within mask remain, while the rest gets cropped.
automatic_masking : bool
If True, uses threshold + watershed to automatically create a mask based on shapes. Threshold needs to be adjusted manually.
threshold : float
Threshold for pixel intensity values at which to segment image into foreground and background.
overwrite : bool
Whether to overwrite the image in sdata.images.
masked_image_name : None | str
Name of the masked image in sdata.images if overwrite==True. Defaults to f"{image}_masked".
Returns
-------
sd.SpatialData
spatialdata object with masked image
"""
channels, height, width = sdata.images[image].data.shape

if automatic_masking:
polygon = _draw_polygons(sdata.images[image].data, threshold)
elif isinstance(mask, str):
polygon = sdata.shapes[mask].iloc[0].geometry
else:
polygon = mask

polygon_geom = [mapping(polygon)]

transform = rasterio.transform.Affine(1, 0, 0, 0, 1, 0) # identity transform

image_mask = geometry_mask(polygon_geom, invert=invert, out_shape=(height, width), transform=transform)

if channels > 1:
image_mask = dask.array.broadcast_to(image_mask, (channels, height, width))

masked_image = sdata.images[image].data * image_mask
images = {}
images["masked_image"] = Image2DModel.parse(masked_image)

if overwrite:
sdata.images[image] = images["masked_image"]
else:
if masked_image_name is None:
masked_image_name = f"{image}_masked"
sdata.images[masked_image_name] = images["masked_image"]


def _draw_polygons(image, threshold):
"""
Given an image, detect regions to turn into polygon shapes, which are then used as a mask.

Parameters
----------
image : np.ndarray
Image to find regions in.
threshold : float
Threshold for pixel intensity values at which to segment image into foreground and background.
Returns
-------
shapely.geometry.Polygon
Polygon containing the detected regions.
"""
if image.shape[0] == 1:
image = image[0]
binary_image = image > np.percentile(image.flatten(), threshold)

distance = ndimage.distance_transform_edt(binary_image)
markers, _ = ndimage.label(distance)

segmented = watershed(-distance, markers, mask=binary_image)

contours = find_contours(segmented, level=0.5)

polygons = [Polygon(contour) for contour in contours if len(contour) > 2]
polygon = unary_union(polygons)

return polygon
36 changes: 28 additions & 8 deletions src/scportrait/pipeline/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ class Project(Logable):
DEFAULT_SINGLE_CELL_IMAGE_DTYPE = DEFAULT_SINGLE_CELL_IMAGE_DTYPE
DEFAULT_CELL_ID_NAME = DEFAULT_CELL_ID_NAME

_h5sc_handle = None
_h5sc_adata = None

PALETTE = [
"blue",
"green",
Expand Down Expand Up @@ -232,6 +235,15 @@ def __exit__(self):
def __del__(self):
self._clear_temp_dir()

def __getstate__(self):
state = self.__dict__.copy()
state["_h5sc_handle"] = None # ensure closed before pickling
return state

def __setstate__(self, state):
self.__dict__.update(state)
self._h5sc_handle = None # will be reopened lazily

@property
def sdata_path(self) -> str:
return self._get_sdata_path()
Expand All @@ -243,14 +255,22 @@ def sdata(self) -> SpatialData:

@property
def h5sc(self) -> AnnData:
if self.extraction_f is None:
raise ValueError("No extraction method has been set.")
else:
if self.extraction_f.output_path is None:
path = self.extraction_f.extraction_file
else:
path = self.extraction_f.output_path
return read_h5sc(path)
# Always safely close previous handle if it exists
if hasattr(self, "_h5sc_handle") and self._h5sc_handle is not None:
try:
self._h5sc_handle.close()
except (ValueError, RuntimeError):
# handle was already closed or in invalid state
pass
self._h5sc_handle = None

# Load a fresh AnnData
adata = read_h5sc(self.extraction_f.output_path or self.extraction_f.extraction_file)
Comment on lines +267 to +268
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will fail if extraction_f is None (as checked in the old code). The validation should still be performed before attempting to read the h5sc file, otherwise this will raise an AttributeError.

Suggested change
# Load a fresh AnnData
adata = read_h5sc(self.extraction_f.output_path or self.extraction_f.extraction_file)
# Ensure extraction configuration is available before accessing its attributes
extraction_f = getattr(self, "extraction_f", None)
if extraction_f is None:
raise ValueError(
"Extraction configuration 'extraction_f' is not set. "
"Run the extraction step before accessing 'h5sc'."
)
# Load a fresh AnnData
adata = read_h5sc(extraction_f.output_path or extraction_f.extraction_file)

Copilot uses AI. Check for mistakes.

# Track only the handle, not the adata
self._h5sc_handle = adata.uns["_h5sc_file_handle"]

return adata

##### Setup Functions #####

Expand Down
30 changes: 28 additions & 2 deletions src/scportrait/plotting/h5sc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import warnings
from collections.abc import Iterable

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from anndata import AnnData
from matplotlib.axes import Axes
from matplotlib.figure import Figure
Expand Down Expand Up @@ -272,7 +274,20 @@ def cell_grid_single_channel(
fig = ax.get_figure()

spacing = spacing * single_cell_size
images = get_image_with_cellid(adata, _cell_ids, channel_id)

# Collect images in a list
if isinstance(adata.obsm["single_cell_images"], h5py.Dataset):
# if working on a memory-backed array
images = get_image_with_cellid(adata, _cell_ids, channel_id)

else:
# non backed h5sc adata objects can be accessed directly
# these are created by slicing original h5sc objects
col = "scportrait_cell_id"
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hardcoded column name 'scportrait_cell_id' is duplicated here and in lines 393-394. Consider importing and using DEFAULT_CELL_ID_NAME constant from scportrait.pipeline._utils.constants to maintain consistency.

Suggested change
col = "scportrait_cell_id"
col = DEFAULT_CELL_ID_NAME

Copilot uses AI. Check for mistakes.
mapping = pd.Series(data=np.arange(len(adata.obs), dtype=int), index=adata.obs[col].values)
idx = mapping.loc[_cell_ids].to_numpy()
images = adata.obsm["single_cell_images"][idx, channel_id, :, :]

_plot_image_grid(
ax,
images,
Expand Down Expand Up @@ -368,7 +383,18 @@ def cell_grid_multi_channel(
channel_names = adata.uns["single_cell_images"]["channel_names"]

# Collect images in a list
images = get_image_with_cellid(adata, _cell_ids)
if isinstance(adata.obsm["single_cell_images"], h5py.Dataset):
# if working on a memory-backed array
images = get_image_with_cellid(adata, _cell_ids)

else:
# non backed h5sc adata objects can be accessed directly
# these are created by slicing original h5sc objects
col = "scportrait_cell_id"
mapping = pd.Series(data=np.arange(len(adata.obs), dtype=int), index=adata.obs[col].values)
idx = mapping.loc[_cell_ids].to_numpy()
images = adata.obsm["single_cell_images"][idx]

if select_channels is not None:
if not isinstance(select_channels, Iterable):
select_channels = [select_channels]
Expand Down
7 changes: 7 additions & 0 deletions src/scportrait/plotting/sdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import spatialdata
import xarray
from geopandas.geodataframe import GeoDataFrame
from matplotlib.axes import Axes

PALETTE = [
Expand Down Expand Up @@ -48,6 +50,11 @@ def _get_shape_element(sdata, element_name) -> tuple[int, int]:
"""
if isinstance(sdata[element_name], xarray.DataTree):
shape = sdata[element_name].scale0.image.shape
elif isinstance(sdata[element_name], GeoDataFrame):
bounds = sdata[element_name].geometry.bounds
x = int(np.ceil(bounds["maxx"] - bounds["minx"]))
y = int(np.ceil(bounds["maxy"] - bounds["miny"]))
shape = (x, y)
else:
shape = sdata[element_name].data.shape

Expand Down
18 changes: 16 additions & 2 deletions src/scportrait/tools/h5sc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@
Functions to work with scPortrait's standardized single-cell data format.
"""

from .operations import get_image_index, get_image_with_cellid
from .operations import (
add_spatial_coordinates,
get_cell_id_index,
get_image_with_cellid,
subset_cells_region,
subset_h5sc,
update_obs_on_disk,
)

__all__ = ["get_image_with_cellid", "get_image_index"]
__all__ = [
"update_obs_on_disk",
"get_image_with_cellid",
"get_cell_id_index",
"add_spatial_coordinates",
"subset_h5sc",
"subset_cells_region",
]
Loading
Loading