Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
f76dc7b
Modularity score functions with comments
amalia-k510 Apr 25, 2025
f092469
typo fix
amalia-k510 Apr 25, 2025
7ffa1ec
Merge branch 'scverse:main' into main
amalia-k510 Apr 25, 2025
c0d0c52
Merge branch 'scverse:main' into main
amalia-k510 May 7, 2025
68652a7
modularity code updated and 6 tests written for modularity
amalia-k510 May 7, 2025
948319a
error fixing from pipelines
amalia-k510 May 7, 2025
6a64330
ruff error fix
amalia-k510 May 7, 2025
793351f
keywords variable fix
amalia-k510 May 7, 2025
92d8e26
neighbors from a precomputed distance matrix, still need to make sure…
amalia-k510 May 7, 2025
198c4fb
revert back
amalia-k510 May 7, 2025
e7fb67a
code only for the prexisting distance matrix
amalia-k510 May 7, 2025
14cb441
initial changes for the neighborhors
amalia-k510 May 8, 2025
0ce8c15
distances name switch and sparse array allowed
amalia-k510 May 12, 2025
914b87d
input fix
amalia-k510 May 12, 2025
d285203
variable input fixes
amalia-k510 May 12, 2025
50705b3
test added
amalia-k510 May 12, 2025
4730667
numpy issue fix for one line
amalia-k510 May 12, 2025
4b9fe3e
avoid densifying sparse matrices
amalia-k510 May 12, 2025
7d754c7
switched to @needs
amalia-k510 May 12, 2025
15320af
switched to @needs
amalia-k510 May 12, 2025
623a86c
variable fix input
amalia-k510 May 12, 2025
e8c9a25
code from separate PR removed
amalia-k510 May 12, 2025
040b8b7
unify metadata assembly
flying-sheep May 16, 2025
d6a9aee
Discard changes to src/scanpy/neighbors/__init__.py
flying-sheep May 16, 2025
c03b863
comments fix and release notes
amalia-k510 May 23, 2025
473a437
comments fix typo
amalia-k510 May 23, 2025
c6e5d1f
Merge branch 'scverse:main' into main
amalia-k510 May 25, 2025
ac0a6b3
before neighbour merge
amalia-k510 May 25, 2025
1c033f0
notes
amalia-k510 May 25, 2025
662534b
Merge branch 'main' of https://github.com/amalia-k510/scanpy into main
amalia-k510 May 25, 2025
32116f0
Merge branch 'matrix_exist' into main
amalia-k510 May 25, 2025
a1b2033
merge error fix
amalia-k510 May 25, 2025
4cdc729
post merge and call form neighbor
amalia-k510 May 25, 2025
cb7aaf6
release notes fix
amalia-k510 May 26, 2025
7e34ce2
Merge branch 'main' into main
flying-sheep May 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/scanpy/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from ._gearys_c import gearys_c
from ._metrics import confusion_matrix
from ._metrics import confusion_matrix, modularity, modularity_adata
from ._morans_i import morans_i

__all__ = ["confusion_matrix", "gearys_c", "morans_i"]
__all__ = ["confusion_matrix", "gearys_c", "modularity", "modularity_adata", "morans_i"]
96 changes: 95 additions & 1 deletion src/scanpy/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,16 @@
import pandas as pd
from natsort import natsorted
from pandas.api.types import CategoricalDtype
from scipy.sparse import coo_matrix

from .._compat import SpBase

if TYPE_CHECKING:
from collections.abc import Sequence
from typing import Literal

from anndata import AnnData
from numpy.typing import ArrayLike


def confusion_matrix(
Expand Down Expand Up @@ -60,7 +67,9 @@ def confusion_matrix(
orig, new = pd.Series(orig), pd.Series(new)
assert len(orig) == len(new)

unique_labels = pd.unique(np.concatenate((orig.values, new.values)))
unique_labels = pd.unique(
np.concatenate((np.asarray(orig.values), np.asarray(new.values)))
)

# Compute
mtx = _confusion_matrix(orig, new, labels=unique_labels)
Expand Down Expand Up @@ -89,3 +98,88 @@ def confusion_matrix(
df = df.loc[np.array(orig_idx), np.array(new_idx)]

return df


def modularity(
connectivities: ArrayLike | SpBase,
labels: pd.Series | ArrayLike,
mode: Literal["UNDIRECTED", "DIRECTED"] = "UNDIRECTED",
) -> float:
# accepting both dense or spare matrices as the connectivity graph
# setting mode between directed and undirected
"""Compute the modularity of a graph given its connectivities and labels.

Parameters
----------
connectivities: array-like or sparse matrix
Weighted adjacency matrix representing the graph. Can be a dense NumPy array or a sparse CSR matrix.
labels: array-like or pandas.Series
Cluster labels for each node in the graph.
mode: str
The mode of the graph. Can be "UNDIRECTED" or "DIRECTED". Default is "UNDIRECTED".

Returns
-------
float
The modularity of the graph based on the provided clustering.
"""
try:
# try to import igraph in case the user wants to calculate modularity
# not in the main module to avoid import errors
import igraph as ig
except ImportError as e:
msg = "igraph is require for computing modularity"
raise ImportError(msg) from e
if isinstance(connectivities, SpBase):
# check if the connectivities is a sparse matrix
coo = coo_matrix(connectivities)
edges = list(zip(coo.row, coo.col, strict=False))
# converting to the coo format to extract the edges and weights
# storing only non-zero elements and their indices
weights = coo.data.tolist()
graph = ig.Graph(edges=edges, directed=mode == "DIRECTED")
graph.es["weight"] = weights
else:
# if the graph is dense, creates it directly using igraph's adjacency matrix
dense_array = np.asarray(connectivities)
igraph_mode = ig.ADJ_UNDIRECTED if mode == "UNDIRECTED" else ig.ADJ_DIRECTED
graph = ig.Graph.Weighted_Adjacency(dense_array.tolist(), mode=igraph_mode)
# cluster labels to integer codes required by igraph
labels = pd.Categorical(np.asarray(labels)).codes

return graph.modularity(labels)


def modularity_adata(
adata: AnnData,
*,
labels: str | ArrayLike = "leiden",
obsp: str = "connectivities",
mode: Literal["UNDIRECTED", "DIRECTED"] = "UNDIRECTED",
) -> float:
# default to leiden labels and connectivities as it is more common
"""Compute modularity from an AnnData object using stored graph and clustering labels.

Parameters
----------
adata: AnnData
The AnnData object containing the data.
labels: str or array-like
The key in adata.obs that contains the cluster labels.
obsp: str
The key in adata.obsp that contains the connectivities.

Returns
-------
float
The modularity of the graph based on the provided clustering.
"""
# if labels is a key in adata.obs, get the values from adata.obs
# otherwise, assume it is an array-like object
label_array = adata.obs[labels] if isinstance(labels, str) else labels
connectivities = adata.obsp[obsp]

if isinstance(connectivities, pd.DataFrame):
connectivities = connectivities.values

return modularity(connectivities, label_array, mode=mode)
109 changes: 109 additions & 0 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@
import pytest
import threadpoolctl
from scipy import sparse
from scipy.sparse import csr_matrix # noqa: TID251

import scanpy as sc
from scanpy.metrics import modularity, modularity_adata
from testing.scanpy._helpers.data import pbmc68k_reduced
from testing.scanpy._pytest.marks import needs
from testing.scanpy._pytest.params import ARRAY_TYPES

if TYPE_CHECKING:
Expand Down Expand Up @@ -196,3 +199,109 @@ def test_confusion_matrix_api():
pd.testing.assert_frame_equal(
expected, sc.metrics.confusion_matrix(data["a"], "b", data)
)


# Test 1: Sample graph with clear community structure (dense & sparse, directed & undirected)
@pytest.mark.parametrize(
"mode", ["UNDIRECTED", "DIRECTED"], ids=["undirected", "directed"]
)
@pytest.mark.parametrize("use_sparse", [False, True], ids=["sparse", "dense"])
@needs.igraph
def test_modularity_sample_structure(mode, use_sparse):
# 4 node adjacency matrix with two separate 2-node communities
mat = np.array(
[
[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 1, 1],
[0, 0, 1, 1],
]
)
labels = ["A", "A", "B", "B"]
adj = csr_matrix(mat) if use_sparse else mat
score = modularity(adj, labels, mode=mode)

# Modularity should be between 0 and 1
assert 0 <= score <= 1


# Test 2: Edge case when all nodes belong to the same community/cluster
@needs.igraph
def test_modularity_single_community():
# fully connected graph sample
adj = np.ones((4, 4)) - np.eye(4)
labels = ["A", "A", "A", "A"]
score = modularity(adj, labels)

# modularity should be 0
assert score == pytest.approx(0.0, rel=1e-6)


# Test 3: Invalad input, labels length does not match adjacency matrix size
@needs.igraph
def test_modularity_invalid_labels():
from igraph._igraph import InternalError

adj = np.eye(4)
labels = ["A", "A", "B"]
with pytest.raises(
InternalError,
match="Membership vector size differs",
):
modularity(adj, labels)


# Test 4: Pass both Louvain and Leiden clustering algorithms
@pytest.mark.parametrize("cluster_method", ["louvain", "leiden"])
@needs.igraph
@needs.louvain
@needs.leidenalg
def test_modularity_adata_multiple_clusterings(cluster_method):
# Loading PBMC Data and compute PCA and neighbors graph
adata = sc.datasets.pbmc3k()
sc.pp.pca(adata)
sc.pp.neighbors(adata)
# Compute modularity using both Louvain and Leiden clustering
if cluster_method == "louvain":
sc.tl.louvain(adata)
score_louvain = modularity_adata(
adata, labels="louvain", obsp="connectivities", mode="UNDIRECTED"
)
# Score should be between 0 and 1
assert 0 <= score_louvain <= 1
if cluster_method == "leiden":
sc.tl.leiden(adata)
score_leiden = modularity_adata(
adata, labels="leiden", obsp="connectivities", mode="UNDIRECTED"
)
# Score should be between 0 and 1
assert 0 <= score_leiden <= 1


# Test 5: Modularity should be the same no matter the order of the labels
@needs.igraph
def test_modularity_order():
adj = np.array(
[
[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 1, 1],
[0, 0, 1, 1],
]
)
labels1 = ["A", "A", "B", "B"]
labels2 = ["B", "B", "A", "A"]
score_1 = modularity(adj, labels1)
score_2 = modularity(adj, labels2)
assert score_1 == score_2


# Test 6: Modularity on disconnected graph lke edge-case behavior in some algorithms
@needs.igraph
def test_modularity_disconnected_graph():
adj = np.zeros((4, 4))
labels = ["A", "B", "C", "D"]
score = modularity(adj, labels)

# Modularity should be undefined for disconnected graphs
assert np.isnan(score)
Loading