scverse · amalia-k510 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025 · May 7, 2025
diff --git a/src/scanpy/metrics/__init__.py b/src/scanpy/metrics/__init__.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from ._gearys_c import gearys_c
-from ._metrics import confusion_matrix
+from ._metrics import confusion_matrix, modularity, modularity_adata
 from ._morans_i import morans_i
 
-__all__ = ["confusion_matrix", "gearys_c", "morans_i"]
+__all__ = ["confusion_matrix", "gearys_c", "modularity", "modularity_adata", "morans_i"]
diff --git a/src/scanpy/metrics/_metrics.py b/src/scanpy/metrics/_metrics.py
@@ -8,9 +8,16 @@
 import pandas as pd
 from natsort import natsorted
 from pandas.api.types import CategoricalDtype
+from scipy.sparse import coo_matrix
+
+from .._compat import SpBase
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
+    from typing import Literal
+
+    from anndata import AnnData
+    from numpy.typing import ArrayLike
 
 
 def confusion_matrix(
@@ -60,7 +67,9 @@ def confusion_matrix(
     orig, new = pd.Series(orig), pd.Series(new)
     assert len(orig) == len(new)
 
-    unique_labels = pd.unique(np.concatenate((orig.values, new.values)))
+    unique_labels = pd.unique(
+        np.concatenate((np.asarray(orig.values), np.asarray(new.values)))
+    )
 
     # Compute
     mtx = _confusion_matrix(orig, new, labels=unique_labels)
@@ -89,3 +98,88 @@ def confusion_matrix(
     df = df.loc[np.array(orig_idx), np.array(new_idx)]
 
     return df
+
+
+def modularity(
+    connectivities: ArrayLike | SpBase,
+    labels: pd.Series | ArrayLike,
+    mode: Literal["UNDIRECTED", "DIRECTED"] = "UNDIRECTED",
+) -> float:
+    # accepting both dense or spare matrices as the connectivity graph
+    # setting mode between directed and undirected
+    """Compute the modularity of a graph given its connectivities and labels.
+
+    Parameters
+    ----------
+    connectivities: array-like or sparse matrix
+        Weighted adjacency matrix representing the graph. Can be a dense NumPy array or a sparse CSR matrix.
+    labels: array-like or pandas.Series
+        Cluster labels for each node in the graph.
+    mode: str
+        The mode of the graph. Can be "UNDIRECTED" or "DIRECTED". Default is "UNDIRECTED".
+
+    Returns
+    -------
+    float
+        The modularity of the graph based on the provided clustering.
+    """
+    try:
+        # try to import igraph in case the user wants to calculate modularity
+        # not in the main module to avoid import errors
+        import igraph as ig
+    except ImportError as e:
+        msg = "igraph is require for computing modularity"
+        raise ImportError(msg) from e
+    if isinstance(connectivities, SpBase):
+        # check if the connectivities is a sparse matrix
+        coo = coo_matrix(connectivities)
+        edges = list(zip(coo.row, coo.col, strict=False))
+        # converting to the coo format to extract the edges and weights
+        # storing only non-zero elements and their indices
+        weights = coo.data.tolist()
+        graph = ig.Graph(edges=edges, directed=mode == "DIRECTED")
+        graph.es["weight"] = weights
+    else:
+        # if the graph is dense, creates it directly using igraph's adjacency matrix
+        dense_array = np.asarray(connectivities)
+        igraph_mode = ig.ADJ_UNDIRECTED if mode == "UNDIRECTED" else ig.ADJ_DIRECTED
+        graph = ig.Graph.Weighted_Adjacency(dense_array.tolist(), mode=igraph_mode)
+    # cluster labels to integer codes required by igraph
+    labels = pd.Categorical(np.asarray(labels)).codes
+
+    return graph.modularity(labels)
+
+
+def modularity_adata(
+    adata: AnnData,
+    *,
+    labels: str | ArrayLike = "leiden",
+    obsp: str = "connectivities",
+    mode: Literal["UNDIRECTED", "DIRECTED"] = "UNDIRECTED",
+) -> float:
+    # default to leiden labels and connectivities as it is more common
+    """Compute modularity from an AnnData object using stored graph and clustering labels.
+
+    Parameters
+    ----------
+    adata: AnnData
+        The AnnData object containing the data.
+    labels: str or array-like
+        The key in adata.obs that contains the cluster labels.
+    obsp: str
+        The key in adata.obsp that contains the connectivities.
+
+    Returns
+    -------
+    float
+        The modularity of the graph based on the provided clustering.
+    """
+    # if labels is a key in adata.obs, get the values from adata.obs
+    # otherwise, assume it is an array-like object
+    label_array = adata.obs[labels] if isinstance(labels, str) else labels
+    connectivities = adata.obsp[obsp]
+
+    if isinstance(connectivities, pd.DataFrame):
+        connectivities = connectivities.values
+
+    return modularity(connectivities, label_array, mode=mode)
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -10,9 +10,12 @@
 import pytest
 import threadpoolctl
 from scipy import sparse
+from scipy.sparse import csr_matrix  # noqa: TID251
 
 import scanpy as sc
+from scanpy.metrics import modularity, modularity_adata
 from testing.scanpy._helpers.data import pbmc68k_reduced
+from testing.scanpy._pytest.marks import needs
 from testing.scanpy._pytest.params import ARRAY_TYPES
 
 if TYPE_CHECKING:
@@ -196,3 +199,109 @@ def test_confusion_matrix_api():
     pd.testing.assert_frame_equal(
         expected, sc.metrics.confusion_matrix(data["a"], "b", data)
     )
+
+
+# Test 1: Sample graph with clear community structure (dense & sparse, directed & undirected)
+@pytest.mark.parametrize(
+    "mode", ["UNDIRECTED", "DIRECTED"], ids=["undirected", "directed"]
+)
+@pytest.mark.parametrize("use_sparse", [False, True], ids=["sparse", "dense"])
+@needs.igraph
+def test_modularity_sample_structure(mode, use_sparse):
+    # 4 node adjacency matrix with two separate 2-node communities
+    mat = np.array(
+        [
+            [1, 1, 0, 0],
+            [1, 1, 0, 0],
+            [0, 0, 1, 1],
+            [0, 0, 1, 1],
+        ]
+    )
+    labels = ["A", "A", "B", "B"]
+    adj = csr_matrix(mat) if use_sparse else mat
+    score = modularity(adj, labels, mode=mode)
+
+    # Modularity should be between 0 and 1
+    assert 0 <= score <= 1
+
+
+# Test 2: Edge case when all nodes belong to the same community/cluster
+@needs.igraph
+def test_modularity_single_community():
+    # fully connected graph sample
+    adj = np.ones((4, 4)) - np.eye(4)
+    labels = ["A", "A", "A", "A"]
+    score = modularity(adj, labels)
+
+    # modularity should be 0
+    assert score == pytest.approx(0.0, rel=1e-6)
+
+
+# Test 3: Invalad input, labels length does not match adjacency matrix size
+@needs.igraph
+def test_modularity_invalid_labels():
+    from igraph._igraph import InternalError
+
+    adj = np.eye(4)
+    labels = ["A", "A", "B"]
+    with pytest.raises(
+        InternalError,
+        match="Membership vector size differs",
+    ):
+        modularity(adj, labels)
+
+
+# Test 4: Pass both Louvain and Leiden clustering algorithms
+@pytest.mark.parametrize("cluster_method", ["louvain", "leiden"])
+@needs.igraph
+@needs.louvain
+@needs.leidenalg
+def test_modularity_adata_multiple_clusterings(cluster_method):
+    # Loading PBMC Data and compute PCA and neighbors graph
+    adata = sc.datasets.pbmc3k()
+    sc.pp.pca(adata)
+    sc.pp.neighbors(adata)
+    # Compute modularity using both Louvain and Leiden clustering
+    if cluster_method == "louvain":
+        sc.tl.louvain(adata)
+        score_louvain = modularity_adata(
+            adata, labels="louvain", obsp="connectivities", mode="UNDIRECTED"
+        )
+        # Score should be between 0 and 1
+        assert 0 <= score_louvain <= 1
+    if cluster_method == "leiden":
+        sc.tl.leiden(adata)
+        score_leiden = modularity_adata(
+            adata, labels="leiden", obsp="connectivities", mode="UNDIRECTED"
+        )
+        # Score should be between 0 and 1
+        assert 0 <= score_leiden <= 1
+
+
+# Test 5: Modularity should be the same no matter the order of the labels
+@needs.igraph
+def test_modularity_order():
+    adj = np.array(
+        [
+            [1, 1, 0, 0],
+            [1, 1, 0, 0],
+            [0, 0, 1, 1],
+            [0, 0, 1, 1],
+        ]
+    )
+    labels1 = ["A", "A", "B", "B"]
+    labels2 = ["B", "B", "A", "A"]
+    score_1 = modularity(adj, labels1)
+    score_2 = modularity(adj, labels2)
+    assert score_1 == score_2
+
+
+# Test 6: Modularity on disconnected graph lke edge-case behavior in some algorithms
+@needs.igraph
+def test_modularity_disconnected_graph():
+    adj = np.zeros((4, 4))
+    labels = ["A", "B", "C", "D"]
+    score = modularity(adj, labels)
+
+    # Modularity should be undefined for disconnected graphs
+    assert np.isnan(score)