Merge pull request #16 from HiDiHlabs/dev

niklasmueboe · web-flow · commit d21e8940dbb9 · 2025-07-16T14:20:52.000Z
API updates and multimodal support
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+*.egg-info
+build
+__pycache__
+
+# Sphinx
+generated
+jupyter_execute
+
+*.ipynb
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,19 +18,13 @@ repos:
     rev: v0.12.2
     hooks:
       - id: ruff
-        args: [--fix]
-  - repo: https://github.com/PyCQA/isort
-    rev: 6.0.1
-    hooks:
-      - id: isort
-  - repo: https://github.com/psf/black
-    rev: 25.1.0
-    hooks:
-      - id: black
+      - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.16.1
     hooks:
       - id: mypy
+        additional_dependencies:
+          - numpy
   - repo: https://github.com/codespell-project/codespell
     rev: v2.4.1
     hooks:
diff --git a/README.md b/README.md
@@ -1,19 +1,20 @@
 # SpatialLeiden
 
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Code style: Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
-[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
 [![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
+[![Docs](https://app.readthedocs.org/projects/spatialleiden/badge/?version=latest)](https://spatialleiden.readthedocs.io)
+[![PyPI](https://img.shields.io/pypi/v/spatialleiden)](https://pypi.org/project/spatialleiden)
+[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/spatialleiden/README.html)
+
 
 ``SpatialLeiden`` is an implementation of
 [Multiplex Leiden clustering](https://leidenalg.readthedocs.io/en/stable/multiplex.html)
 that can be used to cluster spatially resolved omics data.
 
 ``SpatialLeiden`` integrates with the [scverse](https://scverse.org/) by leveraging
-[scanpy](https://scanpy.readthedocs.io/) and [anndata](https://anndata.readthedocs.io/)
-but can also be used independently.
+[anndata](https://anndata.readthedocs.io/) but can also be used independently.
 
 ## Installation
 
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -10,7 +10,9 @@ Multiplex Leiden
    :nosignatures:
    :toctree: ./generated/
 
+   leiden
    spatialleiden
+   spatialleiden_multimodal
    multiplex_leiden
 
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -50,9 +50,9 @@
 intersphinx_mapping = dict(
     anndata=("https://anndata.readthedocs.io/en/stable/", None),
     matplotlib=("https://matplotlib.org/stable/", None),
+    mudata=("https://mudata.readthedocs.io/en/stable/", None),
     numpy=("https://numpy.org/doc/stable/", None),
     python=("https://docs.python.org/3", None),
-    scanpy=("https://scanpy.readthedocs.io/en/stable/", None),
     scipy=("https://docs.scipy.org/doc/scipy/", None),
     squidpy=("https://squidpy.readthedocs.io/en/stable/", None),
 )
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -6,7 +6,7 @@ SpatialLeiden is an implementation of
 that can be used to cluster spatially resolved omics data.
 
 SpatialLeiden integrates with the `scverse <https://scverse.org/>`_ by leveraging
-`scanpy <https://scanpy.readthedocs.io/>`_ and `anndata <https://anndata.readthedocs.io/>`_
+`anndata <https://anndata.readthedocs.io/>`_
 but can also be used independently.
 
 Citations
diff --git a/docs/source/usage.md b/docs/source/usage.md
@@ -2,6 +2,7 @@
 file_format: mystnb
 kernelspec:
   name: python
+  display_name: python
 jupytext:
   text_representation:
     extension: .md
@@ -29,6 +30,13 @@ import anndata as ad
 with NamedTemporaryFile(suffix=".h5ad") as h5ad_file:
     urlretrieve("https://figshare.com/ndownloader/files/40038538", h5ad_file.name)
     adata = ad.read_h5ad(h5ad_file)
+
+
+# This is not recommended! Suppressing the warnings is only done because the code is run
+# when building the docs and would clutter the webpage
+import warnings
+
+warnings.filterwarnings("ignore")
 ```
 
 First of all we are going to load the relevant packages that we will be working with as well as setting a random seed that we will use throughout this example to make the results reproducible.
@@ -38,10 +46,10 @@ import scanpy as sc
 import spatialleiden as sl
 import squidpy as sq
 
-seed = 42
+random_state = 42
 ```
 
-The data set consists of 155 genes and ~5,500 cells including their annotation for cell type as well as domains.
+The data set consists of 155 genes and ~5,500 cells including their annotation for the cell type as well as domains.
 
 +++
 
@@ -51,14 +59,36 @@ We will do some standard preprocessing by log-transforming the data and then usi
 
 ```{code-cell} ipython3
 sc.pp.log1p(adata)
-sc.pp.pca(adata, random_state=seed)
+sc.pp.pca(adata, random_state=random_state)
 
-sc.pp.neighbors(adata, random_state=seed)
+sc.pp.neighbors(adata, random_state=random_state)
 ```
 
-For SpatialLeiden we need an additional graph representing the connectivities in the topological space. Here we will use a kNN graph with 10 neighbors that we generate with {py:func}`squidpy.gr.spatial_neighbors`. Alternatives are Delaunay triangulation or regular grids in case of e.g. Visium data.
+### Building spatial neighbor graphs
+
+For SpatialLeiden we need an additional graph representing the neighbors in space i.e.
+which cells are close/next to each other.
+
+What kind of spatial neighbor graph is suitable for the analysis is dependent on the
+technology used to generate the data. Most of the neighborhood structures interesting
+for our use cases can be calculated using {py:func}`squidpy.gr.spatial_neighbors`.
 
-We can use the calculated distances between neighboring points and transform them into connectivities using the {py:func}`spatialleiden.distance2connectivity` function.
+Generally, if the data is generated from a method with a regular lattice it is advisible
+to use this for the analysis;
+* isometric grid (hexagonal): for Visium with `squidpy.gr.spatial_neighbors(adata, coord_type="grid", n_neighs=6)`
+* square grid: for binned Stereo-seq and VisiumHD with `squidpy.gr.spatial_neighbors(adata, coord_type="grid", n_neighs=4)` (using 8 neighbors is also possible)
+
+If your data does not originate from a regular lattice, there are various options to build your neighborhood graph.
+This applies to all imaging-based methodologies that are usually analysed after segmenting cells, but also technolgoies with regular lattices if you use cell segmentation (such as Stereo-seq or VisiumHD).
+* kNN: calculating the *k*-nearest neighbors per cell with `squidpy.gr.spatial_neighbors(adata, coord_type="generic", n_neighs=k)`
+* Delaunay triangulation: `squidpy.gr.spatial_neighbors(adata, coord_type="generic", delaunay=True)`
+* radius-based: with a threshold of *r* units `squidpy.gr.spatial_neighbors(adata, coord_type="generic", radius=r)`
+* other methods such as Gabriel graphs, ...
+
+For the neighborhoods that are not based on regular grids we can, furthermore, scale the weight of each edge bsaed on the distance between the two cells (that's why it is not useful for the regular grid case as the neighbors will be equidistant).
+This can be achieved by calculating connectivities based on the distances using the {py:func}`spatialleiden.distance2connectivity` function.
+
+Here, we will use a kNN graph with 10 neighbors.
 
 ```{code-cell} ipython3
 sq.gr.spatial_neighbors(adata, coord_type="generic", n_neighs=10)
@@ -68,16 +98,20 @@ adata.obsp["spatial_connectivities"] = sl.distance2connectivity(
 )
 ```
 
+### Finding clusters
+
 Now, we can already run {py:func}`spatialleiden.spatialleiden` (which we will also compare to normal Leiden clustering).
 
-The `layer_ratio` determines the weighting between the gene expression and the topological layer and is influenced by the graph structures (i.e. how many connections exist, the edge weights, etc.); the lower the value is the closer SpatialLeiden will be to normal Leiden clustering, while higher values lead to more spatially homogeneous clusters.
+The `layer_ratio` determines the weighting between the gene expression and the spatial layer and is influenced by the graph structures (i.e. how many connections exist, the edge weights, etc.); the lower the value is the closer SpatialLeiden will be to normal Leiden clustering, while higher values lead to more spatially homogeneous clusters.
 
 The resolution has the same effect as in Leiden clustering (higher resolution will lead to more clusters) and can be defined for each of the layers (but for now is left at its default value).
 
 ```{code-cell} ipython3
-sc.tl.leiden(adata, directed=False, random_state=seed)
+sc.tl.leiden(adata, directed=False, random_state=random_state)
 
-sl.spatialleiden(adata, layer_ratio=1.8, directed=(False, True), seed=seed)
+sl.spatialleiden(
+    adata, layer_ratio=1.8, directed=(False, True), random_state=random_state
+)
 
 sc.pl.embedding(adata, basis="spatial", color=["leiden", "spatialleiden"])
 ```
@@ -98,8 +132,12 @@ n_clusters = adata.obs["domain"].nunique()
 latent_resolution, spatial_resolution = sl.search_resolution(
     adata,
     n_clusters,
-    latent_kwargs={"seed": seed},
-    spatial_kwargs={"layer_ratio": 1.8, "seed": seed, "directed": (False, True)},
+    latent_kwargs={"random_state": random_state},
+    spatial_kwargs={
+        "layer_ratio": 1.8,
+        "random_state": random_state,
+        "directed": (False, True),
+    },
 )
 
 print(f"Latent resolution: {latent_resolution:.3f}")
@@ -108,6 +146,15 @@ print(f"Spatial resolution: {spatial_resolution:.3f}")
 
 In our case we can compare the resulting clusters to the annotated ground truth regions. If we are not satisfied with the results, we can go back and tweak other parameters such as the underlying neighborhood graphs or the `layer_ratio` to achieve the desired granularity of our results.
 
+```{code-cell} ipython3
+---
+tags: [hide-cell]
+---
+
+# needed for scanpy v1.11 otherwise plotting fails because the number of clusters changed
+del adata.uns["spatialleiden_colors"]
+```
+
 ```{code-cell} ipython3
 sc.pl.embedding(adata, basis="spatial", color=["spatialleiden", "Region"])
 ```
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,31 +1,24 @@
 [build-system]
-requires      = ["setuptools>=61.0.0", "setuptools_scm[toml]>=6.2"]
+requires      = ["setuptools>=77.0.3", "setuptools_scm>=8"]
 build-backend = "setuptools.build_meta"
 
 
 [project]
 name            = "spatialleiden"
-description     = "Implementation of multiplex Leiden for analysis of spatial omics data."
+description     = "Implementation of multiplex Leiden for analysis of (multimodal) spatial omics data."
 readme          = { file = "README.md", content-type = "text/markdown" }
-license         = { file = "LICENSE" }
+license         = "MIT"
+license-files   = ["LICENSE"]
 requires-python = ">=3.10"
 dynamic         = ["version"]
 
 authors = [
     { name = "Niklas Müller-Bötticher", email = "niklas.mueller-boetticher@charite.de" },
     { name = "Shashwat Sahay", email = "shashwatsahay.rsg@gmail.com" },
 ]
-dependencies = [
-    "anndata",
-    "igraph",
-    "leidenalg~=0.10.2",
-    "numpy>=1.21",
-    "scanpy",
-    "scipy>=1.9",
-]
+dependencies = ["igraph", "leidenalg~=0.10.2", "numpy>=1.21", "scipy>=1.9"]
 classifiers = [
     "Intended Audience :: Science/Research",
-    "License :: OSI Approved :: MIT License",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3 :: Only",
@@ -35,8 +28,17 @@ classifiers = [
 ]
 
 [project.optional-dependencies]
-docs = ["sphinx", "sphinx-copybutton", "sphinx-rtd-theme", "squidpy", "myst-nb"]
-dev  = ["spatialleiden[docs]", "pre-commit"]
+docs = [
+    "sphinx",
+    "sphinx-copybutton",
+    "sphinx-rtd-theme",
+    "anndata>=0.10",
+    "scanpy",
+    "squidpy",
+    "dask<2025",         # incompatibility of squidpy with dask >= 2025
+    "myst-nb",
+]
+dev = ["spatialleiden[docs]", "mudata~=0.3", "pre-commit"]
 
 [project.urls]
 Homepage      = "https://github.com/HiDiHlabs/SpatialLeiden"
@@ -53,20 +55,21 @@ include = ["spatialleiden"]
 [tool.setuptools_scm]
 
 
-[tool.isort]
-profile = "black"
-
-[tool.black]
-target-version = ["py310", "py311", "py312", "py313"]
-
 [tool.ruff]
 target-version = "py310"
 
+fix        = true
+show-fixes = true
+
+[tool.ruff.lint]
+extend-select = ["I"]
+
 [tool.mypy]
 python_version         = "3.10"
 ignore_missing_imports = true
 warn_no_return         = false
 packages               = "spatialleiden"
+plugins                = "numpy.typing.mypy_plugin"
 
 [tool.codespell]
 ignore-words-list = "coo"
diff --git a/spatialleiden/__init__.py b/spatialleiden/__init__.py
@@ -1,6 +1,11 @@
 from importlib.metadata import PackageNotFoundError, version
 
-from ._multiplex_leiden import multiplex_leiden, spatialleiden
+from ._multiplex_leiden import (
+    leiden,
+    multiplex_leiden,
+    spatialleiden,
+    spatialleiden_multimodal,
+)
 from ._resolution_search import (
     search_resolution,
     search_resolution_latent,
@@ -17,8 +22,10 @@
 
 
 __all__ = [
+    "leiden",
     "multiplex_leiden",
     "spatialleiden",
+    "spatialleiden_multimodal",
     "search_resolution",
     "search_resolution_latent",
     "search_resolution_spatial",
diff --git a/spatialleiden/_multiplex_leiden.py b/spatialleiden/_multiplex_leiden.py
diff --git a/spatialleiden/_resolution_search.py b/spatialleiden/_resolution_search.py
diff --git a/spatialleiden/_utils.py b/spatialleiden/_utils.py