From 7e67cc1ed89bc749579a4235341a36a830fc5fd0 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Sun, 24 Aug 2025 18:34:01 +0200 Subject: [PATCH 01/10] fix --- src/squidpy/gr/_build.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/squidpy/gr/_build.py b/src/squidpy/gr/_build.py index 2f001905a..c1c9b8ed0 100644 --- a/src/squidpy/gr/_build.py +++ b/src/squidpy/gr/_build.py @@ -457,7 +457,9 @@ def _transform_a_spectral(a: spmatrix) -> spmatrix: return a degrees = np.squeeze(np.array(np.sqrt(1.0 / a.sum(axis=0)))) - a = a.multiply(outer(a.indices, a.indptr, degrees)) + o = outer(a.indices, a.indptr, degrees) + o = csr_matrix((o, a.indices, a.indptr)) + a = a.multiply(o) a.eliminate_zeros() return a From dbdda0c514a37a9b92ce3686bafcc5054a562596 Mon Sep 17 00:00:00 2001 From: "selman.ozleyen" Date: Fri, 29 Aug 2025 16:06:34 +0200 Subject: [PATCH 02/10] attempt to add tests and add better documentation to the functions to describe what's happening --- src/squidpy/gr/_build.py | 76 ++++++++++++++++++++++++--- tests/graph/test_spatial_neighbors.py | 23 ++++---- 2 files changed, 81 insertions(+), 18 deletions(-) diff --git a/src/squidpy/gr/_build.py b/src/squidpy/gr/_build.py index c1c9b8ed0..83e9441af 100644 --- a/src/squidpy/gr/_build.py +++ b/src/squidpy/gr/_build.py @@ -439,7 +439,26 @@ def _build_connectivity( @njit -def outer(indices: NDArrayA, indptr: NDArrayA, degrees: NDArrayA) -> NDArrayA: +def _didj_factors_csr(indices: NDArrayA, indptr: NDArrayA, degrees: NDArrayA) -> NDArrayA: + """ + Return an array F aligned with CSR nonzeros such that + F[k] = d[i] * d[j] for the k-th nonzero (i, j) in CSR order. + + Parameters + ---------- + indices : array of int + CSR `indices` (column indices). + indptr : array of int + CSR `indptr` (row pointer). + degrees : array of float, shape (n,) + Diagonal scaling vector. + + Returns + ------- + array of float + Length equals len(indices). Entry-wise factors d_i * d_j. + """ + res = np.empty_like(indices, dtype=np.float64) start = 0 for i in range(len(indptr) - 1): @@ -450,19 +469,60 @@ def outer(indices: NDArrayA, indptr: NDArrayA, degrees: NDArrayA) -> NDArrayA: return res +def csr_bilateral_diag_scale(A: spmatrix, degrees: NDArrayA) -> csr_matrix: + """ + Return D * A * D where D = diag(d) and A is CSR. + + For every stored nonzero (i, j): + (D A D)_{ij} = d[i] * A_{ij} * d[j]. + + Parameters + ---------- + A : scipy.sparse.csr_matrix + Input sparse matrix. Converted to CSR if needed. + degrees : array_like, shape (n,) + Diagonal vector. len(degrees) must equal A.shape[0]. + + Returns + ------- + scipy.sparse.csr_matrix + Matrix with same sparsity pattern as A. + """ + if A.shape[0] != len(degrees): + raise ValueError("len(d) must equal number of rows of A") + factors = _didj_factors_csr(A.indices, A.indptr, degrees) + F = csr_matrix((factors, A.indices, A.indptr), shape=A.shape) + B = A.multiply(F) + B.eliminate_zeros() + return B + + +def symmetric_normalize_csr(A: spmatrix) -> csr_matrix: + """ + Return D^{-1/2} * A * D^{-1/2}, where D = diag(degrees). + + degrees[i] = sum_j A_{ij} (row degree). + Zeros in degree are left as-is to avoid division by zero. + + Parameters + ---------- + A : scipy.sparse.csr_matrix + + Returns + ------- + scipy.sparse.csr_matrix + """ + degrees = np.squeeze(np.array(np.sqrt(1.0 / A.sum(axis=0)))) + return csr_bilateral_diag_scale(A, degrees) + + def _transform_a_spectral(a: spmatrix) -> spmatrix: if not isspmatrix_csr(a): a = a.tocsr() if not a.nnz: return a - degrees = np.squeeze(np.array(np.sqrt(1.0 / a.sum(axis=0)))) - o = outer(a.indices, a.indptr, degrees) - o = csr_matrix((o, a.indices, a.indptr)) - a = a.multiply(o) - a.eliminate_zeros() - - return a + return symmetric_normalize_csr(a) def _transform_a_cosine(a: spmatrix) -> spmatrix: diff --git a/tests/graph/test_spatial_neighbors.py b/tests/graph/test_spatial_neighbors.py index af7afeb01..d97b0c2ff 100644 --- a/tests/graph/test_spatial_neighbors.py +++ b/tests/graph/test_spatial_neighbors.py @@ -14,6 +14,9 @@ from squidpy.gr import mask_graph, spatial_neighbors from squidpy.gr._build import _build_connectivity +@pytest.fixture(params=[None, "spectral", "cosine"]) +def transform(request): + return request.param class TestSpatialNeighbors: # ground-truth Delaunay distances @@ -49,11 +52,11 @@ def _adata_concat(adata1, adata2): # TODO: add edge cases # TODO(giovp): test with reshuffling @pytest.mark.parametrize(("n_rings", "n_neigh", "sum_dist"), [(1, 6, 0), (2, 18, 30), (3, 36, 84)]) - def test_spatial_neighbors_visium(self, visium_adata: AnnData, n_rings: int, n_neigh: int, sum_dist: int): + def test_spatial_neighbors_visium(self, visium_adata: AnnData, n_rings: int, n_neigh: int, sum_dist: int, transform: str | None): """ check correctness of neighborhoods for visium coordinates """ - spatial_neighbors(visium_adata, n_rings=n_rings) + spatial_neighbors(visium_adata, n_rings=n_rings, transform=transform) assert visium_adata.obsp[Key.obsp.spatial_conn()][0].sum() == n_neigh assert visium_adata.uns[Key.uns.spatial_neighs()]["distances_key"] == Key.obsp.spatial_dist() if n_rings > 1: @@ -62,8 +65,8 @@ def test_spatial_neighbors_visium(self, visium_adata: AnnData, n_rings: int, n_n # test for library_key visium_adata2 = visium_adata.copy() adata_concat, batch1, batch2 = TestSpatialNeighbors._adata_concat(visium_adata, visium_adata2) - spatial_neighbors(visium_adata2, n_rings=n_rings) - spatial_neighbors(adata_concat, library_key="library_id", n_rings=n_rings) + spatial_neighbors(visium_adata2, n_rings=n_rings, transform=transform) + spatial_neighbors(adata_concat, library_key="library_id", n_rings=n_rings, transform=transform) assert adata_concat.obsp[Key.obsp.spatial_conn()][0].sum() == n_neigh np.testing.assert_array_equal( adata_concat[adata_concat.obs["library_id"] == batch1].obsp[Key.obsp.spatial_conn()].toarray(), @@ -116,7 +119,7 @@ def test_set_diag(self, adata_squaregrid: AnnData, set_diag: bool, type_rings: t np.testing.assert_array_equal(G.diagonal(), float(set_diag)) np.testing.assert_array_equal(D.diagonal(), 0.0) - def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData): + def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData, transform: str | None): """ check correctness of neighborhoods for non-visium coordinates """ @@ -138,17 +141,17 @@ def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData): ] ) - spatial_neighbors(non_visium_adata, n_neighs=3, coord_type=None) + spatial_neighbors(non_visium_adata, n_neighs=3, coord_type=None, transform=transform) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() np.testing.assert_array_equal(spatial_graph, correct_knn_graph) - spatial_neighbors(non_visium_adata, radius=5.0, coord_type=None) + spatial_neighbors(non_visium_adata, radius=5.0, coord_type=None, transform=transform) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() np.testing.assert_array_equal(spatial_graph, correct_radius_graph) - spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None) + spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None, transform=transform) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() spatial_dist = non_visium_adata.obsp[Key.obsp.spatial_dist()].toarray() @@ -158,8 +161,8 @@ def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData): # test for library_key non_visium_adata2 = non_visium_adata.copy() adata_concat, batch1, batch2 = TestSpatialNeighbors._adata_concat(non_visium_adata, non_visium_adata2) - spatial_neighbors(adata_concat, library_key="library_id", delaunay=True, coord_type=None) - spatial_neighbors(non_visium_adata2, delaunay=True, coord_type=None) + spatial_neighbors(adata_concat, library_key="library_id", delaunay=True, coord_type=None, transform=transform) + spatial_neighbors(non_visium_adata2, delaunay=True, coord_type=None, transform=transform) np.testing.assert_array_equal( adata_concat[adata_concat.obs["library_id"] == batch1].obsp[Key.obsp.spatial_conn()].toarray(), From a3366e09a4246a93edb44652d4d49bcec67e0d3e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 14:08:23 +0000 Subject: [PATCH 03/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/graph/test_spatial_neighbors.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/graph/test_spatial_neighbors.py b/tests/graph/test_spatial_neighbors.py index d97b0c2ff..c547352c0 100644 --- a/tests/graph/test_spatial_neighbors.py +++ b/tests/graph/test_spatial_neighbors.py @@ -14,10 +14,12 @@ from squidpy.gr import mask_graph, spatial_neighbors from squidpy.gr._build import _build_connectivity + @pytest.fixture(params=[None, "spectral", "cosine"]) def transform(request): return request.param + class TestSpatialNeighbors: # ground-truth Delaunay distances _gt_ddist = np.array( @@ -52,7 +54,9 @@ def _adata_concat(adata1, adata2): # TODO: add edge cases # TODO(giovp): test with reshuffling @pytest.mark.parametrize(("n_rings", "n_neigh", "sum_dist"), [(1, 6, 0), (2, 18, 30), (3, 36, 84)]) - def test_spatial_neighbors_visium(self, visium_adata: AnnData, n_rings: int, n_neigh: int, sum_dist: int, transform: str | None): + def test_spatial_neighbors_visium( + self, visium_adata: AnnData, n_rings: int, n_neigh: int, sum_dist: int, transform: str | None + ): """ check correctness of neighborhoods for visium coordinates """ From b3ddcb7e46ea116a2fb0001bb556da30815e9b93 Mon Sep 17 00:00:00 2001 From: "selman.ozleyen" Date: Fri, 29 Aug 2025 16:34:34 +0200 Subject: [PATCH 04/10] add vibe coded tests prune later --- tests/graph/test_spatial_neighbors.py | 205 ++++++++++++++++++++++++-- 1 file changed, 195 insertions(+), 10 deletions(-) diff --git a/tests/graph/test_spatial_neighbors.py b/tests/graph/test_spatial_neighbors.py index d97b0c2ff..918ec4698 100644 --- a/tests/graph/test_spatial_neighbors.py +++ b/tests/graph/test_spatial_neighbors.py @@ -14,10 +14,12 @@ from squidpy.gr import mask_graph, spatial_neighbors from squidpy.gr._build import _build_connectivity + @pytest.fixture(params=[None, "spectral", "cosine"]) def transform(request): return request.param + class TestSpatialNeighbors: # ground-truth Delaunay distances _gt_ddist = np.array( @@ -52,11 +54,11 @@ def _adata_concat(adata1, adata2): # TODO: add edge cases # TODO(giovp): test with reshuffling @pytest.mark.parametrize(("n_rings", "n_neigh", "sum_dist"), [(1, 6, 0), (2, 18, 30), (3, 36, 84)]) - def test_spatial_neighbors_visium(self, visium_adata: AnnData, n_rings: int, n_neigh: int, sum_dist: int, transform: str | None): + def test_spatial_neighbors_visium(self, visium_adata: AnnData, n_rings: int, n_neigh: int, sum_dist: int): """ check correctness of neighborhoods for visium coordinates """ - spatial_neighbors(visium_adata, n_rings=n_rings, transform=transform) + spatial_neighbors(visium_adata, n_rings=n_rings) assert visium_adata.obsp[Key.obsp.spatial_conn()][0].sum() == n_neigh assert visium_adata.uns[Key.uns.spatial_neighs()]["distances_key"] == Key.obsp.spatial_dist() if n_rings > 1: @@ -65,8 +67,8 @@ def test_spatial_neighbors_visium(self, visium_adata: AnnData, n_rings: int, n_n # test for library_key visium_adata2 = visium_adata.copy() adata_concat, batch1, batch2 = TestSpatialNeighbors._adata_concat(visium_adata, visium_adata2) - spatial_neighbors(visium_adata2, n_rings=n_rings, transform=transform) - spatial_neighbors(adata_concat, library_key="library_id", n_rings=n_rings, transform=transform) + spatial_neighbors(visium_adata2, n_rings=n_rings) + spatial_neighbors(adata_concat, library_key="library_id", n_rings=n_rings) assert adata_concat.obsp[Key.obsp.spatial_conn()][0].sum() == n_neigh np.testing.assert_array_equal( adata_concat[adata_concat.obs["library_id"] == batch1].obsp[Key.obsp.spatial_conn()].toarray(), @@ -119,7 +121,7 @@ def test_set_diag(self, adata_squaregrid: AnnData, set_diag: bool, type_rings: t np.testing.assert_array_equal(G.diagonal(), float(set_diag)) np.testing.assert_array_equal(D.diagonal(), 0.0) - def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData, transform: str | None): + def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData): """ check correctness of neighborhoods for non-visium coordinates """ @@ -141,17 +143,17 @@ def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData, transform ] ) - spatial_neighbors(non_visium_adata, n_neighs=3, coord_type=None, transform=transform) + spatial_neighbors(non_visium_adata, n_neighs=3, coord_type=None) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() np.testing.assert_array_equal(spatial_graph, correct_knn_graph) - spatial_neighbors(non_visium_adata, radius=5.0, coord_type=None, transform=transform) + spatial_neighbors(non_visium_adata, radius=5.0, coord_type=None) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() np.testing.assert_array_equal(spatial_graph, correct_radius_graph) - spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None, transform=transform) + spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() spatial_dist = non_visium_adata.obsp[Key.obsp.spatial_dist()].toarray() @@ -161,8 +163,17 @@ def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData, transform # test for library_key non_visium_adata2 = non_visium_adata.copy() adata_concat, batch1, batch2 = TestSpatialNeighbors._adata_concat(non_visium_adata, non_visium_adata2) - spatial_neighbors(adata_concat, library_key="library_id", delaunay=True, coord_type=None, transform=transform) - spatial_neighbors(non_visium_adata2, delaunay=True, coord_type=None, transform=transform) + spatial_neighbors( + adata_concat, + library_key="library_id", + delaunay=True, + coord_type=None, + ) + spatial_neighbors( + non_visium_adata2, + delaunay=True, + coord_type=None, + ) np.testing.assert_array_equal( adata_concat[adata_concat.obs["library_id"] == batch1].obsp[Key.obsp.spatial_conn()].toarray(), @@ -360,3 +371,177 @@ def test_mask_graph( negative_mask=True, key_added=key_added, ) + + @pytest.mark.parametrize("coord_type", ["generic", "grid"]) + @pytest.mark.parametrize("transform", ["spectral", "cosine"]) + def test_transform_properties(self, coord_type: str, transform: str): + """Test that transforms produce expected matrix properties.""" + # Create test data + if coord_type == "generic": + rng = np.random.default_rng(42) + adata = ad.AnnData(shape=(20, 5)) + adata.obsm["spatial"] = rng.random(size=(20, 2)) * 10 + kwargs = {"n_neighs": 5, "coord_type": "generic"} + else: # grid + # Create a small visium-like dataset + adata = ad.AnnData(shape=(9, 5)) + # 3x3 grid coordinates + coords = np.array([[i, j] for i in range(3) for j in range(3)], dtype=float) + adata.obsm["spatial"] = coords + adata.uns["spatial"] = {"library1": {"scalefactors": {}}} + kwargs = {"n_neighs": 4, "coord_type": "grid"} + + # Test without transform (baseline) + conn_orig, dist_orig = spatial_neighbors(adata, copy=True, transform=None, **kwargs) + + # Test with transform + conn_trans, dist_trans = spatial_neighbors(adata, copy=True, transform=transform, **kwargs) + + # Basic assertions + assert isspmatrix_csr(conn_trans) + assert isspmatrix_csr(dist_trans) + assert conn_trans.shape == conn_orig.shape + assert dist_trans.shape == dist_orig.shape + + # Transform-specific assertions + if transform == "spectral": + # Spectral transform should normalize the matrix + # Row sums should be close to 1.0 for non-isolated nodes + row_sums = np.array(conn_trans.sum(axis=1)).flatten() + non_zero_rows = row_sums > 0 + if np.any(non_zero_rows): + # For connected components, normalized row sums should be reasonable + assert np.all(row_sums[non_zero_rows] <= 2.0) # Reasonable upper bound + assert np.all(row_sums[non_zero_rows] >= 0.1) # Reasonable lower bound + print(conn_trans.toarray()) + print(conn_trans.T.toarray()) + # Matrix should still be symmetric + assert np.allclose(conn_trans.toarray(), conn_trans.T.toarray(), rtol=1e-10) + + elif transform == "cosine": + # Cosine similarity matrix should have values between 0 and 1 + assert conn_trans.min() >= 0.0 + assert conn_trans.max() <= 1.0 + + # Diagonal should be 1.0 for non-zero rows + diag = conn_trans.diagonal() + row_sums = np.array(conn_orig.sum(axis=1)).flatten() + non_zero_rows = row_sums > 0 + if np.any(non_zero_rows): + assert np.allclose(diag[non_zero_rows], 1.0, rtol=1e-10) + + # Distance matrix should be unchanged by transforms + np.testing.assert_allclose(dist_trans.toarray(), dist_orig.toarray(), rtol=1e-12) + + @pytest.mark.parametrize("transform", ["spectral", "cosine"]) + def test_transform_parameters_stored(self, non_visium_adata: AnnData, transform: str): + """Test that transform parameters are correctly stored in uns.""" + spatial_neighbors(non_visium_adata, coord_type="generic", n_neighs=3, transform=transform) + + # Check that transform is stored in parameters + params = non_visium_adata.uns[Key.uns.spatial_neighs()]["params"] + assert "transform" in params + assert params["transform"] == transform + + def test_transform_none_equivalent_to_no_transform(self, non_visium_adata: AnnData): + """Test that transform=None produces the same result as no transform parameter.""" + adata1 = non_visium_adata.copy() + adata2 = non_visium_adata.copy() + + # One with transform=None, one without transform parameter + spatial_neighbors(adata1, coord_type="generic", n_neighs=3, transform=None) + spatial_neighbors(adata2, coord_type="generic", n_neighs=3) + + # Results should be identical + np.testing.assert_allclose( + adata1.obsp[Key.obsp.spatial_conn()].toarray(), adata2.obsp[Key.obsp.spatial_conn()].toarray() + ) + np.testing.assert_allclose( + adata1.obsp[Key.obsp.spatial_dist()].toarray(), adata2.obsp[Key.obsp.spatial_dist()].toarray() + ) + + @pytest.mark.parametrize("transform", ["spectral", "cosine"]) + def test_transform_with_copy(self, non_visium_adata: AnnData, transform: str): + """Test transform functionality with copy=True.""" + conn, dist = spatial_neighbors( + non_visium_adata, coord_type="generic", n_neighs=3, transform=transform, copy=True + ) + + # Basic checks + assert isspmatrix_csr(conn) + assert isspmatrix_csr(dist) + + # Should not modify original data + assert Key.obsp.spatial_conn() not in non_visium_adata.obsp + assert Key.obsp.spatial_dist() not in non_visium_adata.obsp + + # Transform-specific checks + if transform == "spectral": + # Should be normalized + row_sums = np.array(conn.sum(axis=1)).flatten() + non_zero_rows = row_sums > 0 + if np.any(non_zero_rows): + assert np.all(row_sums[non_zero_rows] <= 2.0) + elif transform == "cosine": + # Should have cosine similarity properties + assert conn.min() >= 0.0 + assert conn.max() <= 1.0 + + @pytest.mark.parametrize("transform", ["spectral", "cosine"]) + def test_transform_preserves_sparsity_pattern_structure(self, non_visium_adata: AnnData, transform: str): + """Test that transforms preserve basic connectivity structure when appropriate.""" + # Get original connectivity + conn_orig, _ = spatial_neighbors(non_visium_adata, coord_type="generic", n_neighs=3, transform=None, copy=True) + + # Get transformed connectivity + conn_trans, _ = spatial_neighbors( + non_visium_adata, coord_type="generic", n_neighs=3, transform=transform, copy=True + ) + + if transform == "spectral": + # Spectral transform should preserve the sparsity pattern + # (same non-zero locations) + np.testing.assert_array_equal((conn_orig > 0).toarray(), (conn_trans > 0).toarray()) + elif transform == "cosine": + # Cosine transform creates a dense similarity matrix, so sparsity pattern changes + # But it should still be a valid similarity matrix + assert conn_trans.nnz >= conn_orig.nnz # Usually becomes denser + + def test_transform_invalid_option(self, non_visium_adata: AnnData): + """Test that invalid transform options raise appropriate errors.""" + with pytest.raises((ValueError, NotImplementedError)): + spatial_neighbors(non_visium_adata, coord_type="generic", n_neighs=3, transform="invalid_transform") + + @pytest.mark.parametrize("transform", ["spectral", "cosine"]) + def test_transform_with_different_coord_types(self, transform: str): + """Test transforms work with different coordinate types.""" + # Test with generic coordinates + rng = np.random.default_rng(42) + adata_generic = ad.AnnData(shape=(16, 5)) + adata_generic.obsm["spatial"] = rng.random(size=(16, 2)) * 10 + + spatial_neighbors(adata_generic, coord_type="generic", n_neighs=4, transform=transform) + + # Test with grid coordinates + adata_grid = ad.AnnData(shape=(9, 5)) + coords = np.array([[i, j] for i in range(3) for j in range(3)], dtype=float) + adata_grid.obsm["spatial"] = coords + adata_grid.uns["spatial"] = {"library1": {"scalefactors": {}}} + + spatial_neighbors(adata_grid, coord_type="grid", n_rings=1, transform=transform) + + # Both should complete without error and have transformed matrices + for adata in [adata_generic, adata_grid]: + conn = adata.obsp[Key.obsp.spatial_conn()] + assert conn.nnz > 0 # Should have connections + + if transform == "spectral": + # Check normalization properties + row_sums = np.array(conn.sum(axis=1)).flatten() + non_zero_rows = row_sums > 0 + if np.any(non_zero_rows): + assert np.all(row_sums[non_zero_rows] <= 2.0) + elif transform == "cosine": + # Check cosine properties + assert conn.min() >= 0.0 + assert conn.max() <= 1.0 From 2fffcfdb0c3d9b8c940918fee2db456c74b42df3 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Mon, 1 Sep 2025 23:21:42 +0200 Subject: [PATCH 05/10] add tests for spectral transform --- tests/graph/test_spatial_neighbors.py | 74 +++++++++++++++++++++------ 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/tests/graph/test_spatial_neighbors.py b/tests/graph/test_spatial_neighbors.py index c547352c0..a0cc5fd2d 100644 --- a/tests/graph/test_spatial_neighbors.py +++ b/tests/graph/test_spatial_neighbors.py @@ -15,11 +15,6 @@ from squidpy.gr._build import _build_connectivity -@pytest.fixture(params=[None, "spectral", "cosine"]) -def transform(request): - return request.param - - class TestSpatialNeighbors: # ground-truth Delaunay distances _gt_ddist = np.array( @@ -55,12 +50,16 @@ def _adata_concat(adata1, adata2): # TODO(giovp): test with reshuffling @pytest.mark.parametrize(("n_rings", "n_neigh", "sum_dist"), [(1, 6, 0), (2, 18, 30), (3, 36, 84)]) def test_spatial_neighbors_visium( - self, visium_adata: AnnData, n_rings: int, n_neigh: int, sum_dist: int, transform: str | None + self, + visium_adata: AnnData, + n_rings: int, + n_neigh: int, + sum_dist: int, ): """ check correctness of neighborhoods for visium coordinates """ - spatial_neighbors(visium_adata, n_rings=n_rings, transform=transform) + spatial_neighbors(visium_adata, n_rings=n_rings) assert visium_adata.obsp[Key.obsp.spatial_conn()][0].sum() == n_neigh assert visium_adata.uns[Key.uns.spatial_neighs()]["distances_key"] == Key.obsp.spatial_dist() if n_rings > 1: @@ -69,8 +68,8 @@ def test_spatial_neighbors_visium( # test for library_key visium_adata2 = visium_adata.copy() adata_concat, batch1, batch2 = TestSpatialNeighbors._adata_concat(visium_adata, visium_adata2) - spatial_neighbors(visium_adata2, n_rings=n_rings, transform=transform) - spatial_neighbors(adata_concat, library_key="library_id", n_rings=n_rings, transform=transform) + spatial_neighbors(visium_adata2, n_rings=n_rings) + spatial_neighbors(adata_concat, library_key="library_id", n_rings=n_rings) assert adata_concat.obsp[Key.obsp.spatial_conn()][0].sum() == n_neigh np.testing.assert_array_equal( adata_concat[adata_concat.obs["library_id"] == batch1].obsp[Key.obsp.spatial_conn()].toarray(), @@ -123,7 +122,7 @@ def test_set_diag(self, adata_squaregrid: AnnData, set_diag: bool, type_rings: t np.testing.assert_array_equal(G.diagonal(), float(set_diag)) np.testing.assert_array_equal(D.diagonal(), 0.0) - def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData, transform: str | None): + def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData): """ check correctness of neighborhoods for non-visium coordinates """ @@ -145,17 +144,17 @@ def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData, transform ] ) - spatial_neighbors(non_visium_adata, n_neighs=3, coord_type=None, transform=transform) + spatial_neighbors(non_visium_adata, n_neighs=3, coord_type=None) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() np.testing.assert_array_equal(spatial_graph, correct_knn_graph) - spatial_neighbors(non_visium_adata, radius=5.0, coord_type=None, transform=transform) + spatial_neighbors(non_visium_adata, radius=5.0, coord_type=None) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() np.testing.assert_array_equal(spatial_graph, correct_radius_graph) - spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None, transform=transform) + spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None) spatial_graph = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() spatial_dist = non_visium_adata.obsp[Key.obsp.spatial_dist()].toarray() @@ -165,8 +164,8 @@ def test_spatial_neighbors_non_visium(self, non_visium_adata: AnnData, transform # test for library_key non_visium_adata2 = non_visium_adata.copy() adata_concat, batch1, batch2 = TestSpatialNeighbors._adata_concat(non_visium_adata, non_visium_adata2) - spatial_neighbors(adata_concat, library_key="library_id", delaunay=True, coord_type=None, transform=transform) - spatial_neighbors(non_visium_adata2, delaunay=True, coord_type=None, transform=transform) + spatial_neighbors(adata_concat, library_key="library_id", delaunay=True, coord_type=None) + spatial_neighbors(non_visium_adata2, delaunay=True, coord_type=None) np.testing.assert_array_equal( adata_concat[adata_concat.obs["library_id"] == batch1].obsp[Key.obsp.spatial_conn()].toarray(), @@ -364,3 +363,48 @@ def test_mask_graph( negative_mask=True, key_added=key_added, ) + + def test_spatial_neighbors_transform_mathematical_properties(self, non_visium_adata: AnnData): + """ + Test mathematical properties of each transform. + """ + # Test spectral transform properties + spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None, transform="spectral") + adj_spectral = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() + + # Spectral transform should be symmetric + np.testing.assert_allclose(adj_spectral, adj_spectral.T, atol=1e-10) + + # Spectral transform should have normalized rows (L2 norm <= 1) + row_norms = np.sqrt(np.sum(adj_spectral**2, axis=1)) + np.testing.assert_array_less(row_norms, 1.0 + 1e-10) + + # Test cosine transform properties + spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None, transform="cosine") + adj_cosine = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() + + # Cosine transform should be symmetric + np.testing.assert_allclose(adj_cosine, adj_cosine.T, atol=1e-10) + + # Cosine transform should have values in [-1, 1] + np.testing.assert_array_less(-1.0 - 1e-10, adj_cosine) + np.testing.assert_array_less(adj_cosine, 1.0 + 1e-10) + + # Diagonal of cosine transform should be 1 (self-similarity) + np.testing.assert_allclose(np.diag(adj_cosine), 1.0, atol=1e-10) + + def test_spatial_neighbors_transform_edge_cases(self, non_visium_adata: AnnData): + """ + Test transforms with edge cases (empty graph, single node, etc.). + """ + # Test with a very small dataset + small_adata = non_visium_adata[:5].copy() # Only 5 points + + # Test all transforms with small dataset + for transform in [None, "spectral", "cosine"]: + spatial_neighbors(small_adata, delaunay=True, coord_type=None, transform=transform) + assert Key.obsp.spatial_conn() in small_adata.obsp + assert Key.obsp.spatial_dist() in small_adata.obsp + + # Verify transform parameter is saved + assert small_adata.uns[Key.uns.spatial_neighs()]["params"]["transform"] == transform From 2ce6e85223f9e4b01f5af405b379b66768a1653c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 4 Sep 2025 07:59:20 +0000 Subject: [PATCH 06/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/graph/test_spatial_neighbors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/graph/test_spatial_neighbors.py b/tests/graph/test_spatial_neighbors.py index f07d251b5..9f0d75e72 100644 --- a/tests/graph/test_spatial_neighbors.py +++ b/tests/graph/test_spatial_neighbors.py @@ -15,8 +15,6 @@ from squidpy.gr._build import _build_connectivity - - class TestSpatialNeighbors: # ground-truth Delaunay distances _gt_ddist = np.array( From 651505a9061d921d5c93b9448073c7c45ee95278 Mon Sep 17 00:00:00 2001 From: "selman.ozleyen" Date: Thu, 4 Sep 2025 10:02:53 +0200 Subject: [PATCH 07/10] complete the merge conflict issue --- tests/graph/test_spatial_neighbors.py | 51 ++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/tests/graph/test_spatial_neighbors.py b/tests/graph/test_spatial_neighbors.py index f07d251b5..26378c275 100644 --- a/tests/graph/test_spatial_neighbors.py +++ b/tests/graph/test_spatial_neighbors.py @@ -15,8 +15,6 @@ from squidpy.gr._build import _build_connectivity - - class TestSpatialNeighbors: # ground-truth Delaunay distances _gt_ddist = np.array( @@ -64,8 +62,8 @@ def test_spatial_neighbors_visium(self, visium_adata: AnnData, n_rings: int, n_n # test for library_key visium_adata2 = visium_adata.copy() adata_concat, batch1, batch2 = TestSpatialNeighbors._adata_concat(visium_adata, visium_adata2) - spatial_neighbors(visium_adata2, n_rings=n_rings, transform=transform) - spatial_neighbors(adata_concat, library_key="library_id", n_rings=n_rings, transform=transform) + spatial_neighbors(visium_adata2, n_rings=n_rings) + spatial_neighbors(adata_concat, library_key="library_id", n_rings=n_rings) assert adata_concat.obsp[Key.obsp.spatial_conn()][0].sum() == n_neigh np.testing.assert_array_equal( adata_concat[adata_concat.obs["library_id"] == batch1].obsp[Key.obsp.spatial_conn()].toarray(), @@ -359,3 +357,48 @@ def test_mask_graph( negative_mask=True, key_added=key_added, ) + + def test_spatial_neighbors_transform_mathematical_properties(self, non_visium_adata: AnnData): + """ + Test mathematical properties of each transform. + """ + # Test spectral transform properties + spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None, transform="spectral") + adj_spectral = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() + + # Spectral transform should be symmetric + np.testing.assert_allclose(adj_spectral, adj_spectral.T, atol=1e-10) + + # Spectral transform should have normalized rows (L2 norm <= 1) + row_norms = np.sqrt(np.sum(adj_spectral**2, axis=1)) + np.testing.assert_array_less(row_norms, 1.0 + 1e-10) + + # Test cosine transform properties + spatial_neighbors(non_visium_adata, delaunay=True, coord_type=None, transform="cosine") + adj_cosine = non_visium_adata.obsp[Key.obsp.spatial_conn()].toarray() + + # Cosine transform should be symmetric + np.testing.assert_allclose(adj_cosine, adj_cosine.T, atol=1e-10) + + # Cosine transform should have values in [-1, 1] + np.testing.assert_array_less(-1.0 - 1e-10, adj_cosine) + np.testing.assert_array_less(adj_cosine, 1.0 + 1e-10) + + # Diagonal of cosine transform should be 1 (self-similarity) + np.testing.assert_allclose(np.diag(adj_cosine), 1.0, atol=1e-10) + + def test_spatial_neighbors_transform_edge_cases(self, non_visium_adata: AnnData): + """ + Test transforms with edge cases (empty graph, single node, etc.). + """ + # Test with a very small dataset + small_adata = non_visium_adata[:5].copy() # Only 5 points + + # Test all transforms with small dataset + for transform in [None, "spectral", "cosine"]: + spatial_neighbors(small_adata, delaunay=True, coord_type=None, transform=transform) + assert Key.obsp.spatial_conn() in small_adata.obsp + assert Key.obsp.spatial_dist() in small_adata.obsp + + # Verify transform parameter is saved + assert small_adata.uns[Key.uns.spatial_neighs()]["params"]["transform"] == transform From 303d5c47acc601347e80ff4e57b8215d9c89d88d Mon Sep 17 00:00:00 2001 From: "selman.ozleyen" Date: Thu, 4 Sep 2025 11:11:12 +0200 Subject: [PATCH 08/10] add fau and clean up code --- pyproject.toml | 1 + src/squidpy/gr/_build.py | 64 +++++++++++++--------------------------- 2 files changed, 21 insertions(+), 44 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f7de064f7..a55cf26df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ dependencies = [ "xarray>=2024.10.0", "zarr>=2.6.1,<3.0.0", "spatialdata>=0.2.5", + "fast_array_utils>=1.2.3", ] [project.optional-dependencies] diff --git a/src/squidpy/gr/_build.py b/src/squidpy/gr/_build.py index 83e9441af..b14570e70 100644 --- a/src/squidpy/gr/_build.py +++ b/src/squidpy/gr/_build.py @@ -13,7 +13,7 @@ import pandas as pd from anndata import AnnData from anndata.utils import make_index_unique -from geopandas import GeoDataFrame +from fast_array_utils import stats as fau_stats from numba import njit from scanpy import logging as logg from scipy.sparse import ( @@ -439,13 +439,18 @@ def _build_connectivity( @njit -def _didj_factors_csr(indices: NDArrayA, indptr: NDArrayA, degrees: NDArrayA) -> NDArrayA: +def _csr_bilateral_diag_scale_helper( + data: NDArrayA, indices: NDArrayA, indptr: NDArrayA, degrees: NDArrayA +) -> NDArrayA: """ - Return an array F aligned with CSR nonzeros such that - F[k] = d[i] * d[j] for the k-th nonzero (i, j) in CSR order. + Return an array F aligned with CSR non-zeros such that + F[k] = d[i] * data[k] * d[j] for the k-th non-zero (i, j) in CSR order. Parameters ---------- + + data : array of float + CSR `data` (non-zero values). indices : array of int CSR `indices` (column indices). indptr : array of int @@ -456,64 +461,35 @@ def _didj_factors_csr(indices: NDArrayA, indptr: NDArrayA, degrees: NDArrayA) -> Returns ------- array of float - Length equals len(indices). Entry-wise factors d_i * d_j. + Length equals len(data). Entry-wise factors d_i * d_j * data[k] """ - res = np.empty_like(indices, dtype=np.float64) - start = 0 + res = np.empty_like(data, dtype=np.float64) for i in range(len(indptr) - 1): ixs = indices[indptr[i] : indptr[i + 1]] - res[start : start + len(ixs)] = degrees[i] * degrees[ixs] - start += len(ixs) + res[indptr[i] : indptr[i + 1]] = degrees[i] * degrees[ixs] * data[indptr[i] : indptr[i + 1]] return res -def csr_bilateral_diag_scale(A: spmatrix, degrees: NDArrayA) -> csr_matrix: - """ - Return D * A * D where D = diag(d) and A is CSR. - - For every stored nonzero (i, j): - (D A D)_{ij} = d[i] * A_{ij} * d[j]. - - Parameters - ---------- - A : scipy.sparse.csr_matrix - Input sparse matrix. Converted to CSR if needed. - degrees : array_like, shape (n,) - Diagonal vector. len(degrees) must equal A.shape[0]. - - Returns - ------- - scipy.sparse.csr_matrix - Matrix with same sparsity pattern as A. - """ - if A.shape[0] != len(degrees): - raise ValueError("len(d) must equal number of rows of A") - factors = _didj_factors_csr(A.indices, A.indptr, degrees) - F = csr_matrix((factors, A.indices, A.indptr), shape=A.shape) - B = A.multiply(F) - B.eliminate_zeros() - return B - - -def symmetric_normalize_csr(A: spmatrix) -> csr_matrix: +def symmetric_normalize_csr(adj: spmatrix) -> csr_matrix: """ - Return D^{-1/2} * A * D^{-1/2}, where D = diag(degrees). + Return D^{-1/2} * A * D^{-1/2}, where D = diag(degrees(A)) and A = adj. - degrees[i] = sum_j A_{ij} (row degree). - Zeros in degree are left as-is to avoid division by zero. Parameters ---------- - A : scipy.sparse.csr_matrix + adj : scipy.sparse.csr_matrix Returns ------- scipy.sparse.csr_matrix """ - degrees = np.squeeze(np.array(np.sqrt(1.0 / A.sum(axis=0)))) - return csr_bilateral_diag_scale(A, degrees) + degrees = np.squeeze(np.array(np.sqrt(1.0 / fau_stats.sum(adj, axis=0)))) + if adj.shape[0] != len(degrees): + raise ValueError("len(degrees) must equal number of rows of adj") + res_data = _csr_bilateral_diag_scale_helper(adj.data, adj.indices, adj.indptr, degrees) + return csr_matrix((res_data, adj.indices, adj.indptr), shape=adj.shape) def _transform_a_spectral(a: spmatrix) -> spmatrix: From 8c881beffe5b33fe08b73e4701c40b878a2b4b08 Mon Sep 17 00:00:00 2001 From: "selman.ozleyen" Date: Thu, 4 Sep 2025 11:14:19 +0200 Subject: [PATCH 09/10] make loops parallel --- src/squidpy/gr/_build.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/squidpy/gr/_build.py b/src/squidpy/gr/_build.py index b14570e70..d44a71ccc 100644 --- a/src/squidpy/gr/_build.py +++ b/src/squidpy/gr/_build.py @@ -14,7 +14,7 @@ from anndata import AnnData from anndata.utils import make_index_unique from fast_array_utils import stats as fau_stats -from numba import njit +from numba import njit, prange from scanpy import logging as logg from scipy.sparse import ( SparseEfficiencyWarning, @@ -438,7 +438,7 @@ def _build_connectivity( return Adj -@njit +@njit(parallel=True) def _csr_bilateral_diag_scale_helper( data: NDArrayA, indices: NDArrayA, indptr: NDArrayA, degrees: NDArrayA ) -> NDArrayA: @@ -465,7 +465,7 @@ def _csr_bilateral_diag_scale_helper( """ res = np.empty_like(data, dtype=np.float64) - for i in range(len(indptr) - 1): + for i in prange(len(indptr) - 1): ixs = indices[indptr[i] : indptr[i + 1]] res[indptr[i] : indptr[i + 1]] = degrees[i] * degrees[ixs] * data[indptr[i] : indptr[i + 1]] From 172080d0899c927048f8e6ca68434ffdfcff84ed Mon Sep 17 00:00:00 2001 From: "selman.ozleyen" Date: Thu, 4 Sep 2025 11:17:40 +0200 Subject: [PATCH 10/10] specify fast arrayutils dep --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a55cf26df..9ef354773 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ dependencies = [ "xarray>=2024.10.0", "zarr>=2.6.1,<3.0.0", "spatialdata>=0.2.5", - "fast_array_utils>=1.2.3", + "fast_array_utils", ] [project.optional-dependencies]