Skip to content
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f76dc7b
Modularity score functions with comments
amalia-k510 Apr 25, 2025
f092469
typo fix
amalia-k510 Apr 25, 2025
7ffa1ec
Merge branch 'scverse:main' into main
amalia-k510 Apr 25, 2025
c0d0c52
Merge branch 'scverse:main' into main
amalia-k510 May 7, 2025
68652a7
modularity code updated and 6 tests written for modularity
amalia-k510 May 7, 2025
948319a
error fixing from pipelines
amalia-k510 May 7, 2025
6a64330
ruff error fix
amalia-k510 May 7, 2025
793351f
keywords variable fix
amalia-k510 May 7, 2025
92d8e26
neighbors from a precomputed distance matrix, still need to make sure…
amalia-k510 May 7, 2025
198c4fb
revert back
amalia-k510 May 7, 2025
e7fb67a
code only for the prexisting distance matrix
amalia-k510 May 7, 2025
14cb441
initial changes for the neighborhors
amalia-k510 May 8, 2025
0ce8c15
distances name switch and sparse array allowed
amalia-k510 May 12, 2025
914b87d
input fix
amalia-k510 May 12, 2025
d285203
variable input fixes
amalia-k510 May 12, 2025
50705b3
test added
amalia-k510 May 12, 2025
4730667
numpy issue fix for one line
amalia-k510 May 12, 2025
040b8b7
unify metadata assembly
flying-sheep May 16, 2025
c03b863
comments fix and release notes
amalia-k510 May 23, 2025
473a437
comments fix typo
amalia-k510 May 23, 2025
ec586df
Merge branch 'scverse:main' into matrix_exist
amalia-k510 May 25, 2025
43dcfc0
fix relnotes
flying-sheep May 26, 2025
8a3588c
make non-specified metric `None`
flying-sheep May 26, 2025
293f568
Merge branch 'main' into matrix_exist
flying-sheep May 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/scanpy/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ def confusion_matrix(
orig, new = pd.Series(orig), pd.Series(new)
assert len(orig) == len(new)

unique_labels = pd.unique(np.concatenate((orig.values, new.values)))
unique_labels = pd.unique(
np.concatenate((np.asarray(orig.values), np.asarray(new.values)))
)

# Compute
mtx = _confusion_matrix(orig, new, labels=unique_labels)
Expand Down
111 changes: 111 additions & 0 deletions src/scanpy/neighbors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
from .._utils import NeighborsView, _doc_params, get_literal_vals
from . import _connectivity
from ._common import (
_get_indices_distances_from_dense_matrix,
_get_indices_distances_from_sparse_matrix,
_get_sparse_matrix_from_indices_distances,
)
from ._connectivity import umap
from ._doc import doc_n_pcs, doc_use_rep
from ._types import _KnownTransformer, _Method

Expand Down Expand Up @@ -74,6 +76,7 @@ def neighbors( # noqa: PLR0913
n_neighbors: int = 15,
n_pcs: int | None = None,
*,
distances: np.ndarray | SpBase | None = None,
use_rep: str | None = None,
knn: bool = True,
method: _Method = "umap",
Expand Down Expand Up @@ -186,6 +189,15 @@ def neighbors( # noqa: PLR0913
:doc:`/how-to/knn-transformers`

"""
if distances is not None:
# Added this to support the new distance matrix function
# if a precomputed distance matrix is provided, skip the PCA and distance computation
return neighbors_from_distance(
adata,
distances,
n_neighbors=n_neighbors,
method=method,
)
start = logg.info("computing neighbors")
adata = adata.copy() if copy else adata
if adata.is_view: # we shouldn't need this here...
Expand Down Expand Up @@ -248,6 +260,105 @@ def neighbors( # noqa: PLR0913
return adata if copy else None


def neighbors_from_distance(
adata: AnnData,
distances: np.ndarray | SpBase,
*,
n_neighbors: int = 15,
method: Literal["umap", "gauss"] = "umap", # default to umap
key_added: str | None = None,
) -> AnnData:
### inconsistent neighbors = bkk and n throw some stuff = bad way of writing the graph
### adjust for this = knn = True
# computes the neighborhood graph from a precomputed distance matrix
# both umap an gauss are supported, default is umap
# skipping PCA and distance computation and goes straight to the graph
# key_added is the key under which to store the results in adata.uns or adata.obsp
"""Compute neighbors from a precomputer distance matrix.

Parameters
----------
adata
Annotated data matrix.
distances
Precomputed dense or sparse distance matrix.
n_neighbors
Number of nearest neighbors to use in the graph.
method
Method to use for computing the graph. Currently only 'umap' is supported.
key_added
Optional key under which to store the results. Default is 'neighbors'.

Returns
-------
adata
Annotated data with computed distances and connectivities.
"""
if isinstance(distances, SpBase):
# spare matrices can save memory for large datasets
# csr_matrix is the most efficient format for sparse matrices
# setting the diagonal to 0 is important = distance to self must not affect umap or gauss
# elimimate zeros is important to save memory, avoids storing explicit zeros
distances = sparse.csr_matrix(distances) # noqa: TID251
distances.setdiag(0)
distances.eliminate_zeros()
# extracting for each observation the indices and distances of the n_neighbors
# being then used by umap or gauss
knn_indices, knn_distances = _get_indices_distances_from_sparse_matrix(
distances, n_neighbors
)
else:
# if it is dense, converting it to ndarray
# and setting the diagonal to 0
# extracting knn indices and distances
distances = np.asarray(distances)
np.fill_diagonal(distances, 0)
knn_indices, knn_distances = _get_indices_distances_from_dense_matrix(
distances, n_neighbors
)

if method == "umap":
# using umap to build connectivities from distances
connectivities = umap(
knn_indices,
knn_distances,
n_obs=adata.n_obs,
n_neighbors=n_neighbors,
)
elif method == "gauss":
# using gauss to build connectivities from distances
# requires sparse matrix for efficiency
connectivities = _connectivity.gauss(
sparse.csr_matrix(distances), # noqa: TID251
n_neighbors,
knn=True,
)
else:
msg = f"Method {method} not implemented."
raise NotImplementedError(msg)
# defining where to store graph info
key = "neighbors" if key_added is None else key_added
dists_key = "distances" if key_added is None else key_added + "_distances"
conns_key = "connectivities" if key_added is None else key_added + "_connectivities"
# storing the actual distance and connectivitiy matrices as obsp
adata.uns[dists_key] = sparse.csr_matrix(distances) # noqa: TID251
adata.obsp[conns_key] = connectivities
# populating with metadata describing how neighbors were computed
# I think might be important as many functions downstream rely
# on .uns['neighbors'] to find correct .obsp key
adata.uns[key] = {
"connectivities_key": "connectivities",
"distances_key": "distances",
"params": {
"n_neighbors": n_neighbors,
"method": method,
"random_state": 0,
"metric": "euclidean",
},
}
return adata


class FlatTree(NamedTuple): # noqa: D101
hyperplanes: None
offsets: None
Expand Down
Loading