Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/3831.feat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `jaccard` as `method` for generating connectivities in `sc.pp.neighbors()`
20 changes: 14 additions & 6 deletions src/scanpy/neighbors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def neighbors( # noqa: PLR0913
which also provides a method for estimating connectivities of data points -
the connectivity of the manifold (`method=='umap'`). If `method=='gauss'`,
connectivities are computed according to :cite:t:`Coifman2005`, in the adaption of
:cite:t:`Haghverdi2016`.
:cite:t:`Haghverdi2016`. If `method=='jaccard'`, connectivities are computed as
in PhenoGraph (:cite:p:`Levine2015`).

Parameters
----------
Expand All @@ -114,8 +115,9 @@ def neighbors( # noqa: PLR0913
Kernel to assign low weights to neighbors more distant than the
`n_neighbors` nearest neighbor.
method
Use 'umap' :cite:p:`McInnes2018` or 'gauss' (Gauss kernel following :cite:t:`Coifman2005`
with adaptive width :cite:t:`Haghverdi2016`) for computing connectivities.
Use 'umap' :cite:p:`McInnes2018`, 'gauss' (Gauss kernel following :cite:t:`Coifman2005`
with adaptive width :cite:t:`Haghverdi2016`), or 'jaccard' (Jaccard kernel as in
PhenoGraph, :cite:p:`Levine2015`) for computing connectivities.
transformer
Approximate kNN search implementation following the API of
:class:`~sklearn.neighbors.KNeighborsTransformer`.
Expand Down Expand Up @@ -609,6 +611,12 @@ def compute_neighbors( # noqa: PLR0912
self._connectivities = _connectivity.gauss(
self._distances, self.n_neighbors, knn=self.knn
)
elif method == "jaccard":
self._connectivities = _connectivity.jaccard(
knn_indices,
n_obs=self._adata.shape[0],
n_neighbors=self.n_neighbors,
)
elif method is not None:
msg = f"{method!r} should have been coerced in _handle_transform_args"
raise AssertionError(msg)
Expand All @@ -631,7 +639,7 @@ def _handle_transformer(
) -> tuple[_Method | None, KnnTransformerLike, bool]:
"""Return effective `method` and transformer.

`method` will be coerced to `'gauss'` or `'umap'`.
`method` will be coerced to `'gauss'`, `'umap'`, or `'jaccard'`.
`transformer` is coerced from a str or instance to an instance class.

If `transformer` is `None` and there are few data points,
Expand All @@ -650,7 +658,7 @@ def _handle_transformer(
transformer is None and (use_dense_distances or self._adata.n_obs < 4096)
)

# Coerce `method` to 'gauss' or 'umap'
# Coerce `method` to 'gauss', 'umap', or 'jaccard'
if method == "rapids":
if transformer is not None:
msg = "Can’t specify both `method = 'rapids'` and `transformer`."
Expand All @@ -664,7 +672,7 @@ def _handle_transformer(
raise ValueError(msg)

# Validate `knn`
conn_method = method if method in {"gauss", None} else "umap"
conn_method = method if method in {"gauss", "jaccard", None} else "umap"
if not knn and not (conn_method == "gauss" and transformer is None):
# “knn=False” seems to be only intended for method “gauss”
msg = f"`method = {method!r} only with `knn = True`."
Expand Down
50 changes: 50 additions & 0 deletions src/scanpy/neighbors/_connectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ._common import (
_get_indices_distances_from_dense_matrix,
_get_indices_distances_from_sparse_matrix,
_get_sparse_matrix_from_indices_distances,
)

D = TypeVar("D", NDArray[np.float32], CSRBase)
Expand Down Expand Up @@ -136,3 +137,52 @@ def umap(
)

return connectivities.tocsr()


def jaccard(
knn_indices: NDArray[np.int32 | np.int64],
*,
n_obs: int,
n_neighbors: int,
) -> CSRBase:
"""Derive Jaccard connectivities between data points from kNN indices.

Re-implements the weighting method from Phenograph, :cite:p:`Levine2015`.

Parameters
----------
knn_indices
The input matrix of nearest neighbor indices for each cell.
n_obs
Number of cells in the data-set.
n_neighbors
The number of nearest neighbors to consider.

"""
# Construct unweighted kNN adjacency matrix (self excluded,
# as in PhenoGraph)
adjacency = _get_sparse_matrix_from_indices_distances(
knn_indices, np.ones_like(knn_indices), keep_self=False
)

# Compute |N(i) ∩ N(j)|
i_idx = np.repeat(np.arange(n_obs), n_neighbors - 1)
j_idx = knn_indices[:, 1:].ravel()
rows_i = adjacency[i_idx, :]
rows_j = adjacency[j_idx, :]
shared = np.asarray(rows_i.multiply(rows_j).sum(axis=1)).ravel()

# Jaccard index
jaccard = shared / (2 * (n_neighbors - 1) - shared)

# Build connectivity matrix, filtering out zeros
mask = jaccard != 0
connectivities = sparse.csr_matrix( # noqa: TID251
(jaccard[mask], (i_idx[mask], j_idx[mask])),
shape=(n_obs, n_obs),
)

# Symmetrise by averaging (as default in PhenoGraph)
connectivities = (connectivities + connectivities.T) / 2

return connectivities
2 changes: 1 addition & 1 deletion src/scanpy/neighbors/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


# These two are used with get_literal_vals elsewhere
_Method = Literal["umap", "gauss"]
_Method = Literal["umap", "gauss", "jaccard"]
_KnownTransformer = Literal["pynndescent", "sklearn", "rapids"]

# sphinx-autodoc-typehints can’t transitively import types from if TYPE_CHECKING blocks,
Expand Down
36 changes: 33 additions & 3 deletions tests/test_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,29 @@
]


# jaccard"kernel – only knn results
connectivities_jaccard = [
[0.0, 0.3333333333333333, 0.0, 0.3333333333333333],
[0.3333333333333333, 0.0, 0.16666666666666666, 0.3333333333333333],
[0.0, 0.16666666666666666, 0.0, 0.16666666666666666],
[0.3333333333333333, 0.3333333333333333, 0.16666666666666666, 0.0],
]

transitions_sym_jaccard = [
[0.0, 0.4225771273642583, 0.0, 0.4225771273642583],
[0.4225771273642583, 0.0, 0.4225771273642583, 0.2857142857142857],
[0.0, 0.4225771273642583, 0.0, 0.4225771273642583],
[0.4225771273642583, 0.2857142857142857, 0.4225771273642583, 0.0],
]

transitions_jaccard = [
[0.0, 0.5, 0.0, 0.5],
[0.35714285714285715, 0.0, 0.35714285714285715, 0.2857142857142857],
[0.0, 0.5, 0.0, 0.5],
[0.35714285714285715, 0.2857142857142857, 0.35714285714285715, 0.0],
]


def get_neighbors() -> Neighbors:
return Neighbors(AnnData(np.array(X)))

Expand All @@ -132,11 +155,11 @@ def neigh() -> Neighbors:
return get_neighbors()


@pytest.mark.parametrize("method", ["umap", "gauss"])
@pytest.mark.parametrize("method", ["umap", "gauss", "jaccard"])
def test_distances_euclidean(
mocker: MockerFixture, neigh: Neighbors, method: Literal["umap", "gauss"]
mocker: MockerFixture, neigh: Neighbors, method: Literal["umap", "gauss", "jaccard"]
):
"""Umap and gauss behave the same for distances.
"""Umap, gauss, and jaccard behave the same for distances.

They call pynndescent for large data.
"""
Expand Down Expand Up @@ -194,6 +217,13 @@ def test_distances_all(neigh: Neighbors, transformer, knn):
transitions_sym_gauss_knn,
id="gauss",
),
pytest.param(
"jaccard",
connectivities_jaccard,
transitions_jaccard,
transitions_sym_jaccard,
id="jaccard",
),
],
)
def test_connectivities_euclidean(neigh: Neighbors, method, conn, trans, trans_sym):
Expand Down
Loading