Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
f76dc7b
Modularity score functions with comments
amalia-k510 Apr 25, 2025
f092469
typo fix
amalia-k510 Apr 25, 2025
7ffa1ec
Merge branch 'scverse:main' into main
amalia-k510 Apr 25, 2025
c0d0c52
Merge branch 'scverse:main' into main
amalia-k510 May 7, 2025
68652a7
modularity code updated and 6 tests written for modularity
amalia-k510 May 7, 2025
948319a
error fixing from pipelines
amalia-k510 May 7, 2025
6a64330
ruff error fix
amalia-k510 May 7, 2025
793351f
keywords variable fix
amalia-k510 May 7, 2025
92d8e26
neighbors from a precomputed distance matrix, still need to make sure…
amalia-k510 May 7, 2025
198c4fb
revert back
amalia-k510 May 7, 2025
e7fb67a
code only for the prexisting distance matrix
amalia-k510 May 7, 2025
14cb441
initial changes for the neighborhors
amalia-k510 May 8, 2025
0ce8c15
distances name switch and sparse array allowed
amalia-k510 May 12, 2025
914b87d
input fix
amalia-k510 May 12, 2025
d285203
variable input fixes
amalia-k510 May 12, 2025
50705b3
test added
amalia-k510 May 12, 2025
4730667
numpy issue fix for one line
amalia-k510 May 12, 2025
4b9fe3e
avoid densifying sparse matrices
amalia-k510 May 12, 2025
7d754c7
switched to @needs
amalia-k510 May 12, 2025
15320af
switched to @needs
amalia-k510 May 12, 2025
623a86c
variable fix input
amalia-k510 May 12, 2025
e8c9a25
code from separate PR removed
amalia-k510 May 12, 2025
040b8b7
unify metadata assembly
flying-sheep May 16, 2025
d6a9aee
Discard changes to src/scanpy/neighbors/__init__.py
flying-sheep May 16, 2025
c03b863
comments fix and release notes
amalia-k510 May 23, 2025
473a437
comments fix typo
amalia-k510 May 23, 2025
c6e5d1f
Merge branch 'scverse:main' into main
amalia-k510 May 25, 2025
ac0a6b3
before neighbour merge
amalia-k510 May 25, 2025
1c033f0
notes
amalia-k510 May 25, 2025
662534b
Merge branch 'main' of https://github.com/amalia-k510/scanpy into main
amalia-k510 May 25, 2025
32116f0
Merge branch 'matrix_exist' into main
amalia-k510 May 25, 2025
a1b2033
merge error fix
amalia-k510 May 25, 2025
4cdc729
post merge and call form neighbor
amalia-k510 May 25, 2025
cb7aaf6
release notes fix
amalia-k510 May 26, 2025
7e34ce2
Merge branch 'main' into main
flying-sheep May 28, 2025
efc2f89
Merge branch 'main' into pr/amalia-k510/3613
flying-sheep Dec 5, 2025
e0cf8a6
only one function
flying-sheep Dec 5, 2025
d2ac79b
Merge branch 'main' into main
flying-sheep Jan 13, 2026
16e6c7e
fix syntax
flying-sheep Jan 13, 2026
707766d
fix relnote name
flying-sheep Jan 13, 2026
389f076
fix call
flying-sheep Jan 13, 2026
76cc0f9
more test fixes
flying-sheep Jan 13, 2026
6700928
Merge branch 'main' into main
flying-sheep Jan 13, 2026
562cd52
Merge branch 'main' into pr/amalia-k510/3613
flying-sheep Jan 13, 2026
329de39
fix merge
flying-sheep Jan 13, 2026
7e78927
retrieve
flying-sheep Jan 13, 2026
e7dd7de
test and doc
flying-sheep Jan 13, 2026
dc2cd35
simplify
flying-sheep Jan 13, 2026
99a925a
modes
flying-sheep Jan 13, 2026
e92f65b
fix doctest
flying-sheep Jan 19, 2026
e8880c8
skip low-vers test
flying-sheep Jan 19, 2026
37e8fd8
fix docs
flying-sheep Jan 19, 2026
b6cf0c4
Merge branch 'main' into main
flying-sheep Jan 19, 2026
00a81f5
ci: pre-commit autoupdate (#3947)
pre-commit-ci[bot] Jan 19, 2026
fb230ef
cov
flying-sheep Jan 20, 2026
dd827ec
igraph
flying-sheep Jan 20, 2026
7cc32d9
Merge branch 'main' into main
flying-sheep Jan 20, 2026
24a9d9a
oops
flying-sheep Jan 20, 2026
407cda7
undirected only
flying-sheep Jan 22, 2026
269129b
fast path
flying-sheep Jan 22, 2026
19cecaa
whoops
flying-sheep Jan 22, 2026
1563dd1
use pandas 3 types
flying-sheep Jan 22, 2026
b00cb81
Revert "whoops"
flying-sheep Jan 22, 2026
d235e55
make sure we use new pandas
flying-sheep Jan 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Collections of useful measurements for evaluating results.
:nosignatures:
:toctree: ../generated/

metrics.modularity
metrics.confusion_matrix
metrics.gearys_c
metrics.morans_i
Expand Down
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ def setup(app: Sphinx) -> None:
"scanpy.plotting._stacked_violin.StackedViolin": "scanpy.pl.StackedViolin",
"pandas.core.series.Series": "pandas.Series",
"numpy.bool_": "numpy.bool", # Since numpy 2, numpy.bool is the canonical dtype
"numpy.typing.ArrayLike": ("py:data", "numpy.typing.ArrayLike"),
}

nitpick_ignore = [
Expand Down
1 change: 1 addition & 0 deletions docs/release-notes/3613.feat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add modularity scoring via {func}`scanpy.metrics.modularity` with support for directed/undirected graphs {smaller}`A. Karesh`
1 change: 1 addition & 0 deletions hatch.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ dependency-groups = [ "dev" ]

[envs.docs]
dependency-groups = [ "doc" ]
extra-dependencies = [ "pandas>=3" ]
scripts.build = "sphinx-build -M html docs docs/_build -W {args}"
scripts.open = "python3 -m webbrowser -t docs/_build/html/index.html"
scripts.clean = "git clean -fdX -- {args:docs}"
Expand Down
33 changes: 19 additions & 14 deletions src/scanpy/_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,20 +883,25 @@ class NeighborsView:
This defines where to look for neighbors dictionary,
connectivities, distances.

neigh = NeighborsView(adata, key)
neigh['distances']
neigh['connectivities']
neigh['params']
'connectivities' in neigh
'params' in neigh

is the same as

adata.obsp[adata.uns[key]['distances_key']]
adata.obsp[adata.uns[key]['connectivities_key']]
adata.uns[key]['params']
adata.uns[key]['connectivities_key'] in adata.obsp
'params' in adata.uns[key]
Examples
--------
>>> import scanpy as sc
>>> adata = sc.datasets.pbmc68k_reduced()
>>> key = "neighbors"

>>> neigh = NeighborsView(adata, key)
>>> d = neigh["distances"]
>>> c = neigh["connectivities"]
>>> p = neigh["params"]

is the same as doing this manually

>>> d_key = adata.uns[key].get("distances_key", "distances")
>>> c_key = adata.uns[key].get("connectivities_key", "connectivities")
>>> assert d is adata.obsp[d_key]
>>> assert c is adata.obsp[c_key]
>>> assert p is adata.uns[key]["params"]
>>> assert c_key in adata.obsp

"""

Expand Down
4 changes: 2 additions & 2 deletions src/scanpy/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from ._gearys_c import gearys_c
from ._metrics import confusion_matrix
from ._metrics import confusion_matrix, modularity
from ._morans_i import morans_i

__all__ = ["confusion_matrix", "gearys_c", "morans_i"]
__all__ = ["confusion_matrix", "gearys_c", "modularity", "morans_i"]
131 changes: 130 additions & 1 deletion src/scanpy/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,28 @@

from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, overload

import numpy as np
import pandas as pd
from anndata import AnnData
from natsort import natsorted
from pandas.api.types import CategoricalDtype

from .._utils import NeighborsView

if TYPE_CHECKING:
from collections.abc import Sequence
from typing import Literal

if TYPE_CHECKING:
from pandas.api.typing.aliases import AnyArrayLike
else: # sphinx-autodoc-typehints will execute the outer block, but end up here:
AnyArrayLike = type(
"AnyArrayLike", (), dict(__module__="pandas.api.typing.aliases")
)

from .._compat import SpBase


def confusion_matrix(
Expand Down Expand Up @@ -89,3 +102,119 @@ def confusion_matrix(
df = df.loc[np.array(orig_idx), np.array(new_idx)]

return df


@overload
def modularity(
connectivities: AnyArrayLike | SpBase, /, labels: AnyArrayLike, *, is_directed: bool
) -> float: ...


@overload
def modularity(
adata: AnnData,
/,
labels: str | AnyArrayLike = "leiden",
*,
neighbors_key: str | None = None,
mode: Literal["calculate", "update", "retrieve"] = "calculate",
) -> float: ...


def modularity(
adata_or_connectivities: AnnData | AnyArrayLike | SpBase,
/,
labels: str | AnyArrayLike = "leiden",
*,
neighbors_key: str | None = None,
is_directed: bool | None = None,
mode: Literal["calculate", "update", "retrieve"] = "calculate",
) -> float:
"""Compute the modularity of a graph given its connectivities and labels.

Parameters
----------
adata_or_connectivities
The AnnData object containing the data or a weighted adjacency matrix representing the graph.
labels
Cluster labels for each node in the graph.
When `AnnData` is provided, this can be the key in `adata.obs` that contains the clustering labels and defaults to `"leiden"`.
neighbors_key
When `AnnData` is provided, the key in `adata.obsp` that contains the connectivities.
is_directed
Whether the connectivities are directed or undirected.
Always `False` if `AnnData` is provided, as connectivities are derived from (symmetric) neighbors.
mode
When `AnnData` is provided,
this controls if the stored modularity is retrieved,
or if we should calculate it (and optionally update it in `adata.uns[labels]`).

Returns
-------
The modularity of the graph based on the provided clustering.
"""
if isinstance(adata_or_connectivities, AnnData):
if is_directed:
msg = f"Connectivities stored in `AnnData` are undirected, can’t specify `{is_directed=!r}`"
raise ValueError(msg)
return modularity_adata(
adata_or_connectivities,
labels=labels,
neighbors_key=neighbors_key,
mode=mode,
)
if isinstance(labels, str):
msg = "`labels` must be provided as array when passing a connectivities array"
raise TypeError(msg)
if is_directed is None:
msg = "`is_directed` must be provided when passing a connectivities array"
raise TypeError(msg)
return modularity_array(
adata_or_connectivities, labels=labels, is_directed=is_directed
)


def modularity_adata(
adata: AnnData,
/,
*,
labels: str | AnyArrayLike,
neighbors_key: str | None,
mode: Literal["calculate", "update", "retrieve"],
) -> float:
if mode in {"retrieve", "update"} and not isinstance(labels, str):
msg = "`labels` must be a string when `mode` is `'retrieve'` or `'update'`"
raise ValueError(msg)
if mode == "retrieve":
return adata.uns[labels]["modularity"]

labels_vec = adata.obs[labels] if isinstance(labels, str) else labels
connectivities = NeighborsView(adata, neighbors_key)["connectivities"]

# distances are treated as symmetric, so connectivities as well
m = modularity(connectivities, labels_vec, is_directed=False)
if mode == "update":
adata.uns[labels]["modularity"] = m
return m


def modularity_array(
connectivities: AnyArrayLike | SpBase, /, *, labels: AnyArrayLike, is_directed: bool
) -> float:
try:
import igraph as ig
except ImportError as e: # pragma: no cover
msg = "igraph is require for computing modularity"
raise ImportError(msg) from e
igraph_mode: str = ig.ADJ_DIRECTED if is_directed else ig.ADJ_UNDIRECTED
graph: ig.Graph = ig.Graph.Weighted_Adjacency(connectivities, mode=igraph_mode)
return graph.modularity(_codes(labels))


def _codes(labels: AnyArrayLike) -> AnyArrayLike:
"""Convert cluster labels to integer codes as required by igraph."""
if isinstance(labels, pd.Series):
labels = labels.astype("category").array
if not isinstance(labels, pd.Categorical):
labels = pd.Categorical(labels)
return labels.codes
16 changes: 13 additions & 3 deletions src/scanpy/tools/_leiden.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -47,7 +47,7 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915
flavor: Literal["leidenalg", "igraph"] | None = None,
**clustering_args,
) -> AnnData | None:
"""Cluster cells into subgroups :cite:p:`Traag2019`.
r"""Cluster cells into subgroups :cite:p:`Traag2019`.

Cluster cells using the Leiden algorithm :cite:p:`Traag2019`,
an improved version of the Louvain algorithm :cite:p:`Blondel2008`.
Expand Down Expand Up @@ -120,6 +120,12 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915
A dict with the values for the parameters `resolution`, `random_state`,
and `n_iterations`.

`adata.uns['leiden' | key_added]['modularity']` : :class:`float`
The modularity score of the final clustering,
as calculated by the `flavor`.
Use :func:`scanpy.metrics.modularity`\ `(adata, mode='calculate' | 'update')`
to calculate a score independent of `flavor`.

"""
if flavor is None:
flavor = "leidenalg"
Expand Down Expand Up @@ -178,7 +184,10 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915
if use_weights:
clustering_args["weights"] = np.array(g.es["weight"]).astype(np.float64)
clustering_args["seed"] = random_state
part = leidenalg.find_partition(g, partition_type, **clustering_args)
part = cast(
"MutableVertexPartition",
leidenalg.find_partition(g, partition_type, **clustering_args),
)
else:
g = _utils.get_igraph_from_adjacency(adjacency, directed=False)
if use_weights:
Expand Down Expand Up @@ -212,6 +221,7 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915
random_state=random_state,
n_iterations=n_iterations,
)
adata.uns[key_added]["modularity"] = part.modularity
logg.info(
" finished",
time=start,
Expand Down
28 changes: 22 additions & 6 deletions tests/test_clustering.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from functools import partial
from typing import TYPE_CHECKING

import pandas as pd
import pytest
Expand All @@ -10,21 +11,27 @@
from testing.scanpy._helpers.data import pbmc68k_reduced
from testing.scanpy._pytest.marks import needs

if TYPE_CHECKING:
from typing import Literal


@pytest.fixture
def adata_neighbors():
return pbmc68k_reduced()


FLAVORS = [
pytest.param("igraph", marks=needs.igraph),
pytest.param("leidenalg", marks=needs.leidenalg),
]
@pytest.fixture(
params=[
pytest.param("igraph", marks=needs.igraph),
pytest.param("leidenalg", marks=needs.leidenalg),
]
)
def flavor(request: pytest.FixtureRequest) -> Literal["igraph", "leidenalg"]:
return request.param


@needs.leidenalg
@needs.igraph
@pytest.mark.parametrize("flavor", FLAVORS)
@pytest.mark.parametrize("resolution", [1, 2])
@pytest.mark.parametrize("n_iterations", [-1, 3])
def test_leiden_basic(adata_neighbors, flavor, resolution, n_iterations):
Expand All @@ -44,7 +51,6 @@ def test_leiden_basic(adata_neighbors, flavor, resolution, n_iterations):

@needs.leidenalg
@needs.igraph
@pytest.mark.parametrize("flavor", FLAVORS)
def test_leiden_random_state(adata_neighbors, flavor):
is_leiden_alg = flavor == "leidenalg"
n_iterations = 2 if is_leiden_alg else -1
Expand Down Expand Up @@ -72,8 +78,18 @@ def test_leiden_random_state(adata_neighbors, flavor):
directed=is_leiden_alg,
n_iterations=n_iterations,
)
# reproducible
pd.testing.assert_series_equal(adata_1.obs["leiden"], adata_1_again.obs["leiden"])
assert (
pytest.approx(adata_1.uns["leiden"]["modularity"])
== adata_1_again.uns["leiden"]["modularity"]
)
# different clustering
assert not adata_2.obs["leiden"].equals(adata_1_again.obs["leiden"])
assert (
pytest.approx(adata_2.uns["leiden"]["modularity"])
!= adata_1_again.uns["leiden"]["modularity"]
)


@needs.igraph
Expand Down
Loading
Loading