From 20239967971d0cac8f55738ce49cd5377bf0f73b Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Tue, 18 Mar 2025 17:42:20 +0100
Subject: [PATCH 01/18] perf implement rust co-occurrence statistics

---
 pyproject.toml                |   3 +
 src/squidpy/gr/_ppatterns.py  | 201 ++++++++++++++++++++++++++--------
 tests/graph/test_ppatterns.py |  31 ++++++
 3 files changed, 189 insertions(+), 46 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1143cab94..c97919602 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -102,6 +102,9 @@ docs = [
     "myst-nb>=0.17.1",
     "sphinx_copybutton>=0.5.0",
 ]
+co_occurrence = [
+    "ststat-rs",
+]
 
 [project.urls]
 Homepage = "https://github.com/scverse/squidpy"
diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index a61fed47e..ad72d384a 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+from importlib.util import find_spec
+
 from collections.abc import Iterable, Sequence
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Literal
@@ -325,6 +327,73 @@ def _occur_count(
 
     return out
 
+def _co_occurrence_origin (
+    spatial: NDArrayA,
+    original_clust: pd.Series,
+    interval: NDArrayA,
+    copy: bool = False,
+    n_splits: int | None = None,
+    n_jobs: int | None = None,
+    backend: str = "loky",
+    show_progress_bar: bool = True
+) -> NDArrayA:
+
+    # annotate cluster idx
+    clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)}
+    labs = np.array([clust_map[c] for c in original_clust], dtype=ip)
+    labs_unique = np.array(list(clust_map.values()), dtype=ip)
+
+    # create intervals thresholds
+    if isinstance(interval, int):
+        thresh_min, thresh_max = _find_min_max(spatial)
+        interval = np.linspace(thresh_min, thresh_max, num=interval, dtype=fp)
+    else:
+        interval = np.array(sorted(interval), dtype=fp, copy=True)
+    if len(interval) <= 1:
+        raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.")
+
+    n_obs = spatial.shape[0]
+    if n_splits is None:
+        size_arr = (n_obs**2 * spatial.itemsize) / 1024 / 1024  # calc expected mem usage
+        n_splits = 1
+        if size_arr > 2000:
+            while (n_obs / n_splits) > 2048:
+                n_splits += 1
+            logg.warning(
+                f"`n_splits` was automatically set to `{n_splits}` to "
+                f"prevent `{n_obs}x{n_obs}` distance matrix from being created"
+            )
+    n_splits = int(max(min(n_splits, n_obs), 1))
+
+    # split array and labels
+    spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s))
+    labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s))
+    # create idx array including unique combinations and self-comparison
+    x, y = np.triu_indices_from(np.empty((n_splits, n_splits)))
+    idx_splits = list(zip(x, y, strict=False))
+
+    n_jobs = _get_n_cores(n_jobs)
+    start = logg.info(
+        f"Calculating co-occurrence probabilities for `{len(interval)}` intervals "
+        f"`{len(idx_splits)}` split combinations using `{n_jobs}` core(s)"
+    )
+
+    out_lst = parallelize(
+        _co_occurrence_helper,
+        collection=idx_splits,
+        extractor=chain.from_iterable,
+        n_jobs=n_jobs,
+        backend=backend,
+        show_progress_bar=show_progress_bar,
+    )(
+        spatial_splits=spatial_splits,
+        labs_splits=labs_splits,
+        labs_unique=labs_unique,
+        interval=interval,
+    )
+    out = list(out_lst)[0] if len(idx_splits) == 1 else sum(list(out_lst)) / len(idx_splits)
+
+    return out
 
 def _co_occurrence_helper(
     idx_splits: Iterable[tuple[int, int]],
@@ -352,6 +421,55 @@ def _co_occurrence_helper(
 
     return out_lst
 
+def _co_occurrence_rs(
+    spatial: NDArrayA,
+    original_clust: pd.Series,
+    interval: NDArrayA
+) -> NDArrayA | None:
+
+    from ststat_rs import co_occur_count
+
+    clust = original_clust.cat.codes.astype(np.int32)
+
+    co_occur_3d = co_occur_count(
+        spatial[:, 0],
+        spatial[:, 1],
+        interval, clust
+    )
+
+    ## for each dimension
+    # for i in range(co_occur.shape[2]):
+    num = co_occur_3d.shape[0]
+    labs_unique = range(co_occur_3d.shape[0])
+    out = np.zeros((num, num, interval.shape[0] - 1), dtype=fp)
+
+    interval_seq = np.arange(interval.shape[0] - 2, -1, -1)
+    interval_seq.shape[0]
+
+
+    for i_interval in interval_seq:
+        # print(i_interval)
+        co_occur = co_occur_3d[:, :, i_interval]
+
+        probs_matrix = co_occur / np.sum(co_occur) if np.sum(co_occur) != 0 else np.zeros((num, num), dtype=fp)
+        probs = np.sum(probs_matrix, axis=0)
+        probs_con = np.zeros((num, num), dtype=fp)
+
+        for c in labs_unique:
+            probs_conditional = (
+                co_occur[c] / np.sum(co_occur[c]) if np.sum(co_occur[c]) != 0 else np.zeros(num, dtype=fp)
+            )
+            probs_con[c, :] = np.zeros(num, dtype=fp)
+            for i in range(num):
+                if probs[i] == 0:
+                    probs_con[c, i] = 0
+                else:
+                    probs_con[c, i] = probs_conditional[i] / probs[i]
+
+        # print(interval_seq.shape[0] - 1 - i_interval)
+        out[:, :, interval_seq.shape[0] - 1 - i_interval] = probs_con
+
+    return out
 
 @d.dedent
 def co_occurrence(
@@ -364,6 +482,7 @@ def co_occurrence(
     n_jobs: int | None = None,
     backend: str = "loky",
     show_progress_bar: bool = True,
+    use_rust: bool = False
 ) -> tuple[NDArrayA, NDArrayA] | None:
     """
     Compute co-occurrence probability of clusters.
@@ -381,6 +500,9 @@ def co_occurrence(
         Number of splits in which to divide the spatial coordinates in
         :attr:`anndata.AnnData.obsm` ``['{spatial_key}']``.
     %(parallelize)s
+    use_rust
+        Whether to use the rust implementation of the function. If ``True``, the rust implementation will be used.
+        If ``False``, the python implementation will.
 
     Returns
     -------
@@ -393,6 +515,12 @@ def co_occurrence(
         - :attr:`anndata.AnnData.uns` ``['{cluster_key}_co_occurrence']['interval']`` - the distance thresholds
           computed at ``interval``.
     """
+
+
+    if use_rust and not find_spec("ststat_rs"):
+        ## issue an error and ask user to install ststat_rs
+        raise ImportError("ststat_rs not found. Please install ststat_rs to use the rust version of co-occurrence function: `pip install ststat-rs`")
+
     if isinstance(adata, SpatialData):
         adata = adata.table
     _assert_categorical_obs(adata, key=cluster_key)
@@ -401,11 +529,6 @@ def co_occurrence(
     spatial = adata.obsm[spatial_key].astype(fp)
     original_clust = adata.obs[cluster_key]
 
-    # annotate cluster idx
-    clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)}
-    labs = np.array([clust_map[c] for c in original_clust], dtype=ip)
-    labs_unique = np.array(list(clust_map.values()), dtype=ip)
-
     # create intervals thresholds
     if isinstance(interval, int):
         thresh_min, thresh_max = _find_min_max(spatial)
@@ -415,46 +538,32 @@ def co_occurrence(
     if len(interval) <= 1:
         raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.")
 
-    n_obs = spatial.shape[0]
-    if n_splits is None:
-        size_arr = (n_obs**2 * spatial.itemsize) / 1024 / 1024  # calc expected mem usage
-        n_splits = 1
-        if size_arr > 2000:
-            while (n_obs / n_splits) > 2048:
-                n_splits += 1
-            logg.warning(
-                f"`n_splits` was automatically set to `{n_splits}` to "
-                f"prevent `{n_obs}x{n_obs}` distance matrix from being created"
-            )
-    n_splits = int(max(min(n_splits, n_obs), 1))
-
-    # split array and labels
-    spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s))
-    labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s))
-    # create idx array including unique combinations and self-comparison
-    x, y = np.triu_indices_from(np.empty((n_splits, n_splits)))
-    idx_splits = list(zip(x, y, strict=False))
-
-    n_jobs = _get_n_cores(n_jobs)
-    start = logg.info(
-        f"Calculating co-occurrence probabilities for `{len(interval)}` intervals "
-        f"`{len(idx_splits)}` split combinations using `{n_jobs}` core(s)"
-    )
-
-    out_lst = parallelize(
-        _co_occurrence_helper,
-        collection=idx_splits,
-        extractor=chain.from_iterable,
-        n_jobs=n_jobs,
-        backend=backend,
-        show_progress_bar=show_progress_bar,
-    )(
-        spatial_splits=spatial_splits,
-        labs_splits=labs_splits,
-        labs_unique=labs_unique,
-        interval=interval,
-    )
-    out = list(out_lst)[0] if len(idx_splits) == 1 else sum(list(out_lst)) / len(idx_splits)
+    if use_rust:
+        out = _co_occurrence_rs(
+            spatial,
+            original_clust,
+            interval
+        )
+        start = logg.info(
+            f"Calculating co-occurrence probabilities for `{len(interval)}` intervals"
+        )
+    else:
+        # logg.info("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`")
+        print("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`")
+
+        out = _co_occurrence_origin(
+            spatial,
+            original_clust,
+            interval,
+            copy=copy,
+            n_splits=n_splits,
+            n_jobs=n_jobs,
+            backend=backend,
+            show_progress_bar=show_progress_bar
+        )
+        start = logg.info(
+            f"Calculating co-occurrence probabilities for `{len(interval)}` intervals using `{n_jobs}` core(s) and `{n_splits}` splits"
+        )
 
     if copy:
         logg.info("Finish", time=start)
@@ -465,7 +574,7 @@ def co_occurrence(
         attr="uns",
         key=Key.uns.co_occurrence(cluster_key),
         data={"occ": out, "interval": interval},
-        time=start,
+        time=start
     )
 
 
diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py
index 6752b1548..91502ba2f 100644
--- a/tests/graph/test_ppatterns.py
+++ b/tests/graph/test_ppatterns.py
@@ -130,6 +130,24 @@ def test_co_occurrence(adata: AnnData):
     assert arr.shape[2] == 49
     assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0]
 
+def test_co_occurrence_rs(adata: AnnData):
+    """
+    check co_occurrence score and shape (rust implementation)
+    """
+    co_occurrence(adata, cluster_key="leiden", use_rust=True)
+
+    # assert occurrence in adata.uns
+    assert "leiden_co_occurrence" in adata.uns.keys()
+    assert "occ" in adata.uns["leiden_co_occurrence"].keys()
+    assert "interval" in adata.uns["leiden_co_occurrence"].keys()
+
+    # assert shapes
+    arr = adata.uns["leiden_co_occurrence"]["occ"]
+    assert arr.ndim == 3
+    assert arr.shape[2] == 49
+    assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0]
+
+
 
 # @pytest.mark.parametrize(("ys", "xs"), [(10, 10), (None, None), (10, 20)])
 @pytest.mark.parametrize(("n_jobs", "n_splits"), [(1, 2), (2, 2)])
@@ -155,6 +173,19 @@ def test_co_occurrence_explicit_interval(adata: AnnData, size: int):
         assert interval is not interval_1
         np.testing.assert_allclose(interval, interval_1)  # allclose because in the func, we use f32
 
+@pytest.mark.parametrize("size", [1, 3])
+def test_co_occurrence_rs_explicit_interval(adata: AnnData, size: int):
+    minn, maxx = _find_min_max(adata.obsm[Key.obsm.spatial])
+    interval = np.linspace(minn, maxx, size)
+    if size == 1:
+        with pytest.raises(ValueError, match=r"Expected interval to be of length"):
+            _ = co_occurrence(adata, cluster_key="leiden", copy=True, interval=interval, use_rust=True)
+    else:
+        _, interval_1 = co_occurrence_rs(adata, cluster_key="leiden", copy=True, interval=interval, use_rust=True)
+
+        assert interval is not interval_1
+        np.testing.assert_allclose(interval, interval_1)  # allclose because in the func, we use f32
+
 
 def test_use_raw(dummy_adata: AnnData):
     var_names = [str(i) for i in range(10)]

From e51f5467ac867613c8103bdab06b71c9c17c402a Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Tue, 18 Mar 2025 22:11:44 +0100
Subject: [PATCH 02/18] misc: change rust-py deps

---
 pyproject.toml               | 2 +-
 src/squidpy/gr/_ppatterns.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c97919602..6183a4577 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,7 +103,7 @@ docs = [
     "sphinx_copybutton>=0.5.0",
 ]
 co_occurrence = [
-    "ststat-rs",
+    "scstat-rs",
 ]
 
 [project.urls]
diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index ad72d384a..5104cce52 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -427,7 +427,7 @@ def _co_occurrence_rs(
     interval: NDArrayA
 ) -> NDArrayA | None:
 
-    from ststat_rs import co_occur_count
+    from scstat_rs import co_occur_count
 
     clust = original_clust.cat.codes.astype(np.int32)
 

From a5b82265371c47f6f8293803f1b8cb986c168c01 Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Tue, 18 Mar 2025 22:35:34 +0100
Subject: [PATCH 03/18] doc: improve the documentation

---
 src/squidpy/gr/_ppatterns.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 5104cce52..601367c6f 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -274,6 +274,7 @@ def _score_helper(
     return score_perms
 
 
+## The _occur_count() function is needed for the _co_occurrence_origin implementation
 @njit(
     ft[:, :, :](tt(it[:], 2), ft[:, :], it[:], ft[:], bl),
     parallel=False,
@@ -395,6 +396,8 @@ def _co_occurrence_origin (
 
     return out
 
+
+## The _co_occurrence_helper() function is needed for the _co_occurrence_origin implementation
 def _co_occurrence_helper(
     idx_splits: Iterable[tuple[int, int]],
     spatial_splits: Sequence[NDArrayA],
@@ -501,8 +504,11 @@ def co_occurrence(
         :attr:`anndata.AnnData.obsm` ``['{spatial_key}']``.
     %(parallelize)s
     use_rust
-        Whether to use the rust implementation of the function. If ``True``, the rust implementation will be used.
-        If ``False``, the python implementation will.
+        Whether to use the rust implementation of the co-occurrence probability.
+        If ``True``, the rust implementation will be used.
+        If ``False``, the python implementation will be used.
+        For rust implementation, the `scstat-rs` package is required.
+        You can install it via `pip install scstat-rs`.
 
     Returns
     -------
@@ -517,9 +523,9 @@ def co_occurrence(
     """
 
 
-    if use_rust and not find_spec("ststat_rs"):
-        ## issue an error and ask user to install ststat_rs
-        raise ImportError("ststat_rs not found. Please install ststat_rs to use the rust version of co-occurrence function: `pip install ststat-rs`")
+    if use_rust and not find_spec("scstat_rs"):
+        ## issue an error and ask user to install scstat_rs
+        raise ImportError("scstat_rs not found. Please install scstat_rs to use the rust version of co-occurrence function: `pip install scstat-rs`")
 
     if isinstance(adata, SpatialData):
         adata = adata.table

From f7ff2936c9126d2b3f9464351189b62da4975496 Mon Sep 17 00:00:00 2001
From: Daniele Lucarelli <108922919+MDLDan@users.noreply.github.com>
Date: Wed, 19 Mar 2025 09:13:02 +0100
Subject: [PATCH 04/18] add python re-implementation

---
 src/squidpy/gr/_ppatterns.py | 301 +++++++++++++++++------------------
 1 file changed, 144 insertions(+), 157 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 601367c6f..3ebdd54de 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -12,7 +12,7 @@
 import numpy as np
 import pandas as pd
 from anndata import AnnData
-from numba import njit
+from numba import njit, prange
 from numpy.random import default_rng
 from scanpy import logging as logg
 from scanpy.get import _get_obs_rep
@@ -274,155 +274,147 @@ def _score_helper(
     return score_perms
 
 
-## The _occur_count() function is needed for the _co_occurrence_origin implementation
-@njit(
-    ft[:, :, :](tt(it[:], 2), ft[:, :], it[:], ft[:], bl),
-    parallel=False,
-    fastmath=True,
-)
+@njit(parallel=True, fastmath=True)
 def _occur_count(
-    clust: tuple[NDArrayA, NDArrayA],
-    pw_dist: NDArrayA,
-    labs_unique: NDArrayA,
-    interval: NDArrayA,
-    same_split: bool,
-) -> NDArrayA:
-    num = labs_unique.shape[0]
-    out = np.zeros((num, num, interval.shape[0] - 1), dtype=ft)
-
-    for idx in range(interval.shape[0] - 1):
-        co_occur = np.zeros((num, num), dtype=ft)
-        probs_con = np.zeros((num, num), dtype=ft)
-
-        thres_max = interval[idx + 1]
-        clust_x, clust_y = clust
-
-        # Modified to compute co-occurrence probability ratio over increasing radii sizes as opposed to discrete interval bins
-        # Need pw_dist > 0 to avoid counting a cell with itself as co-occurrence
-        idx_x, idx_y = np.nonzero((pw_dist <= thres_max) & (pw_dist > 0))
-        x = clust_x[idx_x]
-        y = clust_y[idx_y]
-        # Treat computing co-occurrence using the same split and different splits differently
-        # Pairwise distance matrix for between the same split is symmetric and therefore only needs to be counted once
-        for i, j in zip(x, y):  # noqa: B905 # cannot use strict=False because of numba
-            co_occur[i, j] += 1
-            if not same_split:
-                co_occur[j, i] += 1
-
-        # Prevent divison by zero errors when we have low cell counts/small intervals
-        probs_matrix = co_occur / np.sum(co_occur) if np.sum(co_occur) != 0 else np.zeros((num, num), dtype=ft)
-        probs = np.sum(probs_matrix, axis=0)
-
-        for c in labs_unique:
-            probs_conditional = (
-                co_occur[c] / np.sum(co_occur[c]) if np.sum(co_occur[c]) != 0 else np.zeros(num, dtype=ft)
-            )
-            probs_con[c, :] = np.zeros(num, dtype=ft)
-            for i in range(num):
-                if probs[i] == 0:
-                    probs_con[c, i] = 0
-                else:
-                    probs_con[c, i] = probs_conditional[i] / probs[i]
-
-        out[:, :, idx] = probs_con
-
-    return out
-
-def _co_occurrence_origin (
-    spatial: NDArrayA,
-    original_clust: pd.Series,
-    interval: NDArrayA,
-    copy: bool = False,
-    n_splits: int | None = None,
-    n_jobs: int | None = None,
-    backend: str = "loky",
-    show_progress_bar: bool = True
-) -> NDArrayA:
-
-    # annotate cluster idx
-    clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)}
-    labs = np.array([clust_map[c] for c in original_clust], dtype=ip)
-    labs_unique = np.array(list(clust_map.values()), dtype=ip)
-
-    # create intervals thresholds
-    if isinstance(interval, int):
-        thresh_min, thresh_max = _find_min_max(spatial)
-        interval = np.linspace(thresh_min, thresh_max, num=interval, dtype=fp)
-    else:
-        interval = np.array(sorted(interval), dtype=fp, copy=True)
-    if len(interval) <= 1:
-        raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.")
+    spatial_x: NDArrayA, 
+    spatial_y:NDArrayA, 
+    thresholds: NDArrayA, 
+    label_idx: NDArrayA, 
+    n: int, 
+    k: int, 
+    l_val: int
+)-> NDArrayA:
+
+    # Allocate a 2D array to store a flat local result per point.
+    local_results = np.zeros((n, l_val * k * k), dtype=np.int32)
+    for i in prange(n):
+        local_counts = np.zeros(l_val * k * k, dtype=np.int32)
+        for j in range(n):
+            if i == j:
+                continue
+            dx = spatial_x[i] - spatial_x[j]
+            dy = spatial_y[i] - spatial_y[j]
+            dist_sq = dx * dx + dy * dy
+            for r in range(l_val):
+                thresh = thresholds[r]
+                if dist_sq <= thresh:
+                    index = r * (k * k) + label_idx[i] * k + label_idx[j]
+                    local_counts[index] += 1
+                #else:
+                #    break  # If this threshold fails, smaller ones will also fail.
+        for m in range(l_val * k * k):
+            local_results[i, m] = local_counts[m]
+    
+    # Reduce over all points: sum the local counts.
+    result_flat = np.zeros(l_val * k * k, dtype=np.int32)
+    for m in range(l_val * k * k):
+        s = 0
+        for i in range(n):
+            s += local_results[i, m]
+        result_flat[m] = s
+
+    # Reshape to a 3D array with shape (interval, k, k)
+    counts = np.empty((l_val, k, k), dtype=np.int32)
+    for r in range(l_val):
+        for i in range(k):
+            for j in range(k):
+                counts[r, i, j] = result_flat[r * (k * k) + i * k + j]
+    
+    # Rearrange axes to match the original convention: (k, k, interval)
+    result = np.empty((k, k, l_val), dtype=np.int32)
+    for i in range(k):
+        for j in range(k):
+            for r in range(l_val):
+                result[i, j, r] = counts[r, i, j]
+    return result
 
-    n_obs = spatial.shape[0]
-    if n_splits is None:
-        size_arr = (n_obs**2 * spatial.itemsize) / 1024 / 1024  # calc expected mem usage
-        n_splits = 1
-        if size_arr > 2000:
-            while (n_obs / n_splits) > 2048:
-                n_splits += 1
-            logg.warning(
-                f"`n_splits` was automatically set to `{n_splits}` to "
-                f"prevent `{n_obs}x{n_obs}` distance matrix from being created"
-            )
-    n_splits = int(max(min(n_splits, n_obs), 1))
-
-    # split array and labels
-    spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s))
-    labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s))
-    # create idx array including unique combinations and self-comparison
-    x, y = np.triu_indices_from(np.empty((n_splits, n_splits)))
-    idx_splits = list(zip(x, y, strict=False))
-
-    n_jobs = _get_n_cores(n_jobs)
-    start = logg.info(
-        f"Calculating co-occurrence probabilities for `{len(interval)}` intervals "
-        f"`{len(idx_splits)}` split combinations using `{n_jobs}` core(s)"
-    )
-
-    out_lst = parallelize(
-        _co_occurrence_helper,
-        collection=idx_splits,
-        extractor=chain.from_iterable,
-        n_jobs=n_jobs,
-        backend=backend,
-        show_progress_bar=show_progress_bar,
-    )(
-        spatial_splits=spatial_splits,
-        labs_splits=labs_splits,
-        labs_unique=labs_unique,
-        interval=interval,
-    )
-    out = list(out_lst)[0] if len(idx_splits) == 1 else sum(list(out_lst)) / len(idx_splits)
 
-    return out
 
 
-## The _co_occurrence_helper() function is needed for the _co_occurrence_origin implementation
 def _co_occurrence_helper(
-    idx_splits: Iterable[tuple[int, int]],
-    spatial_splits: Sequence[NDArrayA],
-    labs_splits: Sequence[NDArrayA],
-    labs_unique: NDArrayA,
-    interval: NDArrayA,
-    queue: SigQueue | None = None,
-) -> pd.DataFrame:
-    out_lst = []
-    for t in idx_splits:
-        idx_x, idx_y = t
-        labs_x = labs_splits[idx_x]
-        labs_y = labs_splits[idx_y]
-        dist = pairwise_distances(spatial_splits[idx_x], spatial_splits[idx_y])
-
-        out = _occur_count((labs_x, labs_y), dist, labs_unique, interval, idx_x == idx_y)
-        out_lst.append(out)
-
-        if queue is not None:
-            queue.put(Signal.UPDATE)
-
-    if queue is not None:
-        queue.put(Signal.FINISH)
-
-    return out_lst
+    v_x: NDArrayA, 
+    v_y: NDArrayA, 
+    v_radium: NDArrayA, 
+    labs: NDArrayA
+)-> NDArrayA:
+    """
+    Fast co-occurrence probability computation using the new numba-accelerated counting.
+    
+    Parameters
+    ----------
+    v_x : np.ndarray, float64
+         x–coordinates.
+    v_y : np.ndarray, float64
+         y–coordinates.
+    v_radium : np.ndarray, float64
+         Distance thresholds (in ascending order).
+    labs : np.ndarray
+         Cluster labels (as integers).
+    
+    Returns
+    -------
+    occ_prob : np.ndarray
+         A 3D array of shape (k, k, len(v_radium)-1) containing the co-occurrence probabilities.
+    labs_unique : np.ndarray
+         Array of unique labels.
+    """
+    n = len(v_x)
+    labs_unique = np.unique(labs)
+    k = len(labs_unique)
+    # l_val is the number of bins; here we assume the thresholds come from v_radium[1:].
+    l_val = len(v_radium) - 1
+    # Compute squared thresholds from the interval (skip the first value)
+    thresholds = (v_radium[1:]) ** 2
+    
+    # Compute cco-occurence ounts.
+    counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val)
+    
+    # Compute co-occurrence probabilities for each threshold bin.
+    occ_prob = np.empty((k, k, l_val), dtype=np.float32)
+    for r in range(l_val):
+        co_occur = counts[:, :, r].astype(np.float32)
+        
+        # Compute the total count for this threshold.
+        total = 0.0
+        for i in range(k):
+            for j in range(k):
+                total += co_occur[i, j]
+        
+        # Compute the normalized probability matrix.
+        probs_matrix = np.zeros((k, k), dtype=np.float32)
+        if total != 0.0:
+            for i in range(k):
+                for j in range(k):
+                    probs_matrix[i, j] = co_occur[i, j] / total
+        
+        probs = np.zeros(k, dtype=np.float32)
+        for j in range(k):
+            s = 0.0
+            for i in range(k):
+                s += probs_matrix[i, j]
+            probs[j] = s
+        
+        # Compute conditional probabilities.
+        probs_con = np.zeros((k, k), dtype=np.float32)
+        for c in range(k):
+            row_sum = 0.0
+            for j in range(k):
+                row_sum += co_occur[c, j]
+            for i in range(k):
+                cond = 0.0
+                if row_sum != 0.0:
+                    cond = co_occur[c, i] / row_sum
+                if probs[i] == 0.0:
+                    probs_con[c, i] = 0.0
+                else:
+                    probs_con[c, i] = cond / probs[i]
+        
+        # Transpose to match (k, k, interval).
+        for i in range(k):
+            for c in range(k):
+                occ_prob[i, c, r] = probs_con[c, i]
+    
+    return occ_prob
 
 def _co_occurrence_rs(
     spatial: NDArrayA,
@@ -451,7 +443,6 @@ def _co_occurrence_rs(
 
 
     for i_interval in interval_seq:
-        # print(i_interval)
         co_occur = co_occur_3d[:, :, i_interval]
 
         probs_matrix = co_occur / np.sum(co_occur) if np.sum(co_occur) != 0 else np.zeros((num, num), dtype=fp)
@@ -472,7 +463,7 @@ def _co_occurrence_rs(
         # print(interval_seq.shape[0] - 1 - i_interval)
         out[:, :, interval_seq.shape[0] - 1 - i_interval] = probs_con
 
-    return out
+    return out, labs_unique
 
 @d.dedent
 def co_occurrence(
@@ -534,7 +525,9 @@ def co_occurrence(
 
     spatial = adata.obsm[spatial_key].astype(fp)
     original_clust = adata.obs[cluster_key]
-
+    clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)}
+    labs = np.array([clust_map[c] for c in original_clust], dtype=ip)
+    
     # create intervals thresholds
     if isinstance(interval, int):
         thresh_min, thresh_max = _find_min_max(spatial)
@@ -556,17 +549,11 @@ def co_occurrence(
     else:
         # logg.info("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`")
         print("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`")
+        spatial_x = spatial[:, 0]
+        spatial_y = spatial[:, 1]
 
-        out = _co_occurrence_origin(
-            spatial,
-            original_clust,
-            interval,
-            copy=copy,
-            n_splits=n_splits,
-            n_jobs=n_jobs,
-            backend=backend,
-            show_progress_bar=show_progress_bar
-        )
+        # Compute co-occurrence probabilities using the fast numba routine.
+        out = _co_occurrence_helper(spatial_x, spatial_y, interval, labs)
         start = logg.info(
             f"Calculating co-occurrence probabilities for `{len(interval)}` intervals using `{n_jobs}` core(s) and `{n_splits}` splits"
         )

From 9af42521903c82021f9f253b23083b2def4efd82 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 19 Mar 2025 09:50:26 +0000
Subject: [PATCH 05/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/gr/_ppatterns.py  | 92 ++++++++++++-----------------------
 tests/graph/test_ppatterns.py |  3 +-
 2 files changed, 33 insertions(+), 62 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 3ebdd54de..e48d0671b 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -2,9 +2,8 @@
 
 from __future__ import annotations
 
-from importlib.util import find_spec
-
 from collections.abc import Iterable, Sequence
+from importlib.util import find_spec
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Literal
 
@@ -276,15 +275,8 @@ def _score_helper(
 
 @njit(parallel=True, fastmath=True)
 def _occur_count(
-    spatial_x: NDArrayA, 
-    spatial_y:NDArrayA, 
-    thresholds: NDArrayA, 
-    label_idx: NDArrayA, 
-    n: int, 
-    k: int, 
-    l_val: int
-)-> NDArrayA:
-
+    spatial_x: NDArrayA, spatial_y: NDArrayA, thresholds: NDArrayA, label_idx: NDArrayA, n: int, k: int, l_val: int
+) -> NDArrayA:
     # Allocate a 2D array to store a flat local result per point.
     local_results = np.zeros((n, l_val * k * k), dtype=np.int32)
     for i in prange(n):
@@ -300,11 +292,11 @@ def _occur_count(
                 if dist_sq <= thresh:
                     index = r * (k * k) + label_idx[i] * k + label_idx[j]
                     local_counts[index] += 1
-                #else:
+                # else:
                 #    break  # If this threshold fails, smaller ones will also fail.
         for m in range(l_val * k * k):
             local_results[i, m] = local_counts[m]
-    
+
     # Reduce over all points: sum the local counts.
     result_flat = np.zeros(l_val * k * k, dtype=np.int32)
     for m in range(l_val * k * k):
@@ -319,7 +311,7 @@ def _occur_count(
         for i in range(k):
             for j in range(k):
                 counts[r, i, j] = result_flat[r * (k * k) + i * k + j]
-    
+
     # Rearrange axes to match the original convention: (k, k, interval)
     result = np.empty((k, k, l_val), dtype=np.int32)
     for i in range(k):
@@ -329,17 +321,10 @@ def _occur_count(
     return result
 
 
-
-
-def _co_occurrence_helper(
-    v_x: NDArrayA, 
-    v_y: NDArrayA, 
-    v_radium: NDArrayA, 
-    labs: NDArrayA
-)-> NDArrayA:
+def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA:
     """
     Fast co-occurrence probability computation using the new numba-accelerated counting.
-    
+
     Parameters
     ----------
     v_x : np.ndarray, float64
@@ -350,7 +335,7 @@ def _co_occurrence_helper(
          Distance thresholds (in ascending order).
     labs : np.ndarray
          Cluster labels (as integers).
-    
+
     Returns
     -------
     occ_prob : np.ndarray
@@ -365,35 +350,35 @@ def _co_occurrence_helper(
     l_val = len(v_radium) - 1
     # Compute squared thresholds from the interval (skip the first value)
     thresholds = (v_radium[1:]) ** 2
-    
+
     # Compute cco-occurence ounts.
     counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val)
-    
+
     # Compute co-occurrence probabilities for each threshold bin.
     occ_prob = np.empty((k, k, l_val), dtype=np.float32)
     for r in range(l_val):
         co_occur = counts[:, :, r].astype(np.float32)
-        
+
         # Compute the total count for this threshold.
         total = 0.0
         for i in range(k):
             for j in range(k):
                 total += co_occur[i, j]
-        
+
         # Compute the normalized probability matrix.
         probs_matrix = np.zeros((k, k), dtype=np.float32)
         if total != 0.0:
             for i in range(k):
                 for j in range(k):
                     probs_matrix[i, j] = co_occur[i, j] / total
-        
+
         probs = np.zeros(k, dtype=np.float32)
         for j in range(k):
             s = 0.0
             for i in range(k):
                 s += probs_matrix[i, j]
             probs[j] = s
-        
+
         # Compute conditional probabilities.
         probs_con = np.zeros((k, k), dtype=np.float32)
         for c in range(k):
@@ -408,29 +393,21 @@ def _co_occurrence_helper(
                     probs_con[c, i] = 0.0
                 else:
                     probs_con[c, i] = cond / probs[i]
-        
+
         # Transpose to match (k, k, interval).
         for i in range(k):
             for c in range(k):
                 occ_prob[i, c, r] = probs_con[c, i]
-    
+
     return occ_prob
 
-def _co_occurrence_rs(
-    spatial: NDArrayA,
-    original_clust: pd.Series,
-    interval: NDArrayA
-) -> NDArrayA | None:
 
+def _co_occurrence_rs(spatial: NDArrayA, original_clust: pd.Series, interval: NDArrayA) -> NDArrayA | None:
     from scstat_rs import co_occur_count
 
     clust = original_clust.cat.codes.astype(np.int32)
 
-    co_occur_3d = co_occur_count(
-        spatial[:, 0],
-        spatial[:, 1],
-        interval, clust
-    )
+    co_occur_3d = co_occur_count(spatial[:, 0], spatial[:, 1], interval, clust)
 
     ## for each dimension
     # for i in range(co_occur.shape[2]):
@@ -441,7 +418,6 @@ def _co_occurrence_rs(
     interval_seq = np.arange(interval.shape[0] - 2, -1, -1)
     interval_seq.shape[0]
 
-
     for i_interval in interval_seq:
         co_occur = co_occur_3d[:, :, i_interval]
 
@@ -465,6 +441,7 @@ def _co_occurrence_rs(
 
     return out, labs_unique
 
+
 @d.dedent
 def co_occurrence(
     adata: AnnData | SpatialData,
@@ -476,7 +453,7 @@ def co_occurrence(
     n_jobs: int | None = None,
     backend: str = "loky",
     show_progress_bar: bool = True,
-    use_rust: bool = False
+    use_rust: bool = False,
 ) -> tuple[NDArrayA, NDArrayA] | None:
     """
     Compute co-occurrence probability of clusters.
@@ -513,10 +490,11 @@ def co_occurrence(
           computed at ``interval``.
     """
 
-
     if use_rust and not find_spec("scstat_rs"):
         ## issue an error and ask user to install scstat_rs
-        raise ImportError("scstat_rs not found. Please install scstat_rs to use the rust version of co-occurrence function: `pip install scstat-rs`")
+        raise ImportError(
+            "scstat_rs not found. Please install scstat_rs to use the rust version of co-occurrence function: `pip install scstat-rs`"
+        )
 
     if isinstance(adata, SpatialData):
         adata = adata.table
@@ -527,7 +505,7 @@ def co_occurrence(
     original_clust = adata.obs[cluster_key]
     clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)}
     labs = np.array([clust_map[c] for c in original_clust], dtype=ip)
-    
+
     # create intervals thresholds
     if isinstance(interval, int):
         thresh_min, thresh_max = _find_min_max(spatial)
@@ -538,17 +516,13 @@ def co_occurrence(
         raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.")
 
     if use_rust:
-        out = _co_occurrence_rs(
-            spatial,
-            original_clust,
-            interval
-        )
-        start = logg.info(
-            f"Calculating co-occurrence probabilities for `{len(interval)}` intervals"
-        )
+        out = _co_occurrence_rs(spatial, original_clust, interval)
+        start = logg.info(f"Calculating co-occurrence probabilities for `{len(interval)}` intervals")
     else:
         # logg.info("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`")
-        print("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`")
+        print(
+            "Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`"
+        )
         spatial_x = spatial[:, 0]
         spatial_y = spatial[:, 1]
 
@@ -563,11 +537,7 @@ def co_occurrence(
         return out, interval
 
     _save_data(
-        adata,
-        attr="uns",
-        key=Key.uns.co_occurrence(cluster_key),
-        data={"occ": out, "interval": interval},
-        time=start
+        adata, attr="uns", key=Key.uns.co_occurrence(cluster_key), data={"occ": out, "interval": interval}, time=start
     )
 
 
diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py
index 91502ba2f..997dcaf4f 100644
--- a/tests/graph/test_ppatterns.py
+++ b/tests/graph/test_ppatterns.py
@@ -130,6 +130,7 @@ def test_co_occurrence(adata: AnnData):
     assert arr.shape[2] == 49
     assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0]
 
+
 def test_co_occurrence_rs(adata: AnnData):
     """
     check co_occurrence score and shape (rust implementation)
@@ -148,7 +149,6 @@ def test_co_occurrence_rs(adata: AnnData):
     assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0]
 
 
-
 # @pytest.mark.parametrize(("ys", "xs"), [(10, 10), (None, None), (10, 20)])
 @pytest.mark.parametrize(("n_jobs", "n_splits"), [(1, 2), (2, 2)])
 def test_co_occurrence_reproducibility(adata: AnnData, n_jobs: int, n_splits: int):
@@ -173,6 +173,7 @@ def test_co_occurrence_explicit_interval(adata: AnnData, size: int):
         assert interval is not interval_1
         np.testing.assert_allclose(interval, interval_1)  # allclose because in the func, we use f32
 
+
 @pytest.mark.parametrize("size", [1, 3])
 def test_co_occurrence_rs_explicit_interval(adata: AnnData, size: int):
     minn, maxx = _find_min_max(adata.obsm[Key.obsm.spatial])

From 9457767553cd58919d656031667561ccb533c102 Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Wed, 19 Mar 2025 11:01:33 +0100
Subject: [PATCH 06/18] Clean the tests and dependencies

---
 pyproject.toml                |   3 -
 src/squidpy/gr/_ppatterns.py  | 136 ++++++++--------------------------
 tests/graph/test_ppatterns.py |  31 --------
 3 files changed, 31 insertions(+), 139 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6183a4577..1143cab94 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -102,9 +102,6 @@ docs = [
     "myst-nb>=0.17.1",
     "sphinx_copybutton>=0.5.0",
 ]
-co_occurrence = [
-    "scstat-rs",
-]
 
 [project.urls]
 Homepage = "https://github.com/scverse/squidpy"
diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 3ebdd54de..962fa0ca9 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -2,8 +2,6 @@
 
 from __future__ import annotations
 
-from importlib.util import find_spec
-
 from collections.abc import Iterable, Sequence
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Literal
@@ -276,12 +274,12 @@ def _score_helper(
 
 @njit(parallel=True, fastmath=True)
 def _occur_count(
-    spatial_x: NDArrayA, 
-    spatial_y:NDArrayA, 
-    thresholds: NDArrayA, 
-    label_idx: NDArrayA, 
-    n: int, 
-    k: int, 
+    spatial_x: NDArrayA,
+    spatial_y:NDArrayA,
+    thresholds: NDArrayA,
+    label_idx: NDArrayA,
+    n: int,
+    k: int,
     l_val: int
 )-> NDArrayA:
 
@@ -304,7 +302,7 @@ def _occur_count(
                 #    break  # If this threshold fails, smaller ones will also fail.
         for m in range(l_val * k * k):
             local_results[i, m] = local_counts[m]
-    
+
     # Reduce over all points: sum the local counts.
     result_flat = np.zeros(l_val * k * k, dtype=np.int32)
     for m in range(l_val * k * k):
@@ -319,7 +317,7 @@ def _occur_count(
         for i in range(k):
             for j in range(k):
                 counts[r, i, j] = result_flat[r * (k * k) + i * k + j]
-    
+
     # Rearrange axes to match the original convention: (k, k, interval)
     result = np.empty((k, k, l_val), dtype=np.int32)
     for i in range(k):
@@ -332,14 +330,14 @@ def _occur_count(
 
 
 def _co_occurrence_helper(
-    v_x: NDArrayA, 
-    v_y: NDArrayA, 
-    v_radium: NDArrayA, 
+    v_x: NDArrayA,
+    v_y: NDArrayA,
+    v_radium: NDArrayA,
     labs: NDArrayA
 )-> NDArrayA:
     """
     Fast co-occurrence probability computation using the new numba-accelerated counting.
-    
+
     Parameters
     ----------
     v_x : np.ndarray, float64
@@ -350,7 +348,7 @@ def _co_occurrence_helper(
          Distance thresholds (in ascending order).
     labs : np.ndarray
          Cluster labels (as integers).
-    
+
     Returns
     -------
     occ_prob : np.ndarray
@@ -365,35 +363,35 @@ def _co_occurrence_helper(
     l_val = len(v_radium) - 1
     # Compute squared thresholds from the interval (skip the first value)
     thresholds = (v_radium[1:]) ** 2
-    
+
     # Compute cco-occurence ounts.
     counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val)
-    
+
     # Compute co-occurrence probabilities for each threshold bin.
     occ_prob = np.empty((k, k, l_val), dtype=np.float32)
     for r in range(l_val):
         co_occur = counts[:, :, r].astype(np.float32)
-        
+
         # Compute the total count for this threshold.
         total = 0.0
         for i in range(k):
             for j in range(k):
                 total += co_occur[i, j]
-        
+
         # Compute the normalized probability matrix.
         probs_matrix = np.zeros((k, k), dtype=np.float32)
         if total != 0.0:
             for i in range(k):
                 for j in range(k):
                     probs_matrix[i, j] = co_occur[i, j] / total
-        
+
         probs = np.zeros(k, dtype=np.float32)
         for j in range(k):
             s = 0.0
             for i in range(k):
                 s += probs_matrix[i, j]
             probs[j] = s
-        
+
         # Compute conditional probabilities.
         probs_con = np.zeros((k, k), dtype=np.float32)
         for c in range(k):
@@ -408,62 +406,13 @@ def _co_occurrence_helper(
                     probs_con[c, i] = 0.0
                 else:
                     probs_con[c, i] = cond / probs[i]
-        
+
         # Transpose to match (k, k, interval).
         for i in range(k):
             for c in range(k):
                 occ_prob[i, c, r] = probs_con[c, i]
-    
-    return occ_prob
 
-def _co_occurrence_rs(
-    spatial: NDArrayA,
-    original_clust: pd.Series,
-    interval: NDArrayA
-) -> NDArrayA | None:
-
-    from scstat_rs import co_occur_count
-
-    clust = original_clust.cat.codes.astype(np.int32)
-
-    co_occur_3d = co_occur_count(
-        spatial[:, 0],
-        spatial[:, 1],
-        interval, clust
-    )
-
-    ## for each dimension
-    # for i in range(co_occur.shape[2]):
-    num = co_occur_3d.shape[0]
-    labs_unique = range(co_occur_3d.shape[0])
-    out = np.zeros((num, num, interval.shape[0] - 1), dtype=fp)
-
-    interval_seq = np.arange(interval.shape[0] - 2, -1, -1)
-    interval_seq.shape[0]
-
-
-    for i_interval in interval_seq:
-        co_occur = co_occur_3d[:, :, i_interval]
-
-        probs_matrix = co_occur / np.sum(co_occur) if np.sum(co_occur) != 0 else np.zeros((num, num), dtype=fp)
-        probs = np.sum(probs_matrix, axis=0)
-        probs_con = np.zeros((num, num), dtype=fp)
-
-        for c in labs_unique:
-            probs_conditional = (
-                co_occur[c] / np.sum(co_occur[c]) if np.sum(co_occur[c]) != 0 else np.zeros(num, dtype=fp)
-            )
-            probs_con[c, :] = np.zeros(num, dtype=fp)
-            for i in range(num):
-                if probs[i] == 0:
-                    probs_con[c, i] = 0
-                else:
-                    probs_con[c, i] = probs_conditional[i] / probs[i]
-
-        # print(interval_seq.shape[0] - 1 - i_interval)
-        out[:, :, interval_seq.shape[0] - 1 - i_interval] = probs_con
-
-    return out, labs_unique
+    return occ_prob
 
 @d.dedent
 def co_occurrence(
@@ -475,8 +424,7 @@ def co_occurrence(
     n_splits: int | None = None,
     n_jobs: int | None = None,
     backend: str = "loky",
-    show_progress_bar: bool = True,
-    use_rust: bool = False
+    show_progress_bar: bool = True
 ) -> tuple[NDArrayA, NDArrayA] | None:
     """
     Compute co-occurrence probability of clusters.
@@ -494,12 +442,6 @@ def co_occurrence(
         Number of splits in which to divide the spatial coordinates in
         :attr:`anndata.AnnData.obsm` ``['{spatial_key}']``.
     %(parallelize)s
-    use_rust
-        Whether to use the rust implementation of the co-occurrence probability.
-        If ``True``, the rust implementation will be used.
-        If ``False``, the python implementation will be used.
-        For rust implementation, the `scstat-rs` package is required.
-        You can install it via `pip install scstat-rs`.
 
     Returns
     -------
@@ -514,10 +456,6 @@ def co_occurrence(
     """
 
 
-    if use_rust and not find_spec("scstat_rs"):
-        ## issue an error and ask user to install scstat_rs
-        raise ImportError("scstat_rs not found. Please install scstat_rs to use the rust version of co-occurrence function: `pip install scstat-rs`")
-
     if isinstance(adata, SpatialData):
         adata = adata.table
     _assert_categorical_obs(adata, key=cluster_key)
@@ -527,7 +465,7 @@ def co_occurrence(
     original_clust = adata.obs[cluster_key]
     clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)}
     labs = np.array([clust_map[c] for c in original_clust], dtype=ip)
-    
+
     # create intervals thresholds
     if isinstance(interval, int):
         thresh_min, thresh_max = _find_min_max(spatial)
@@ -537,26 +475,14 @@ def co_occurrence(
     if len(interval) <= 1:
         raise ValueError(f"Expected interval to be of length `>= 2`, found `{len(interval)}`.")
 
-    if use_rust:
-        out = _co_occurrence_rs(
-            spatial,
-            original_clust,
-            interval
-        )
-        start = logg.info(
-            f"Calculating co-occurrence probabilities for `{len(interval)}` intervals"
-        )
-    else:
-        # logg.info("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`")
-        print("Using python implementation of co-occurrence function. For faster computation, consider using the rust implementation by setting `use_rust=True`")
-        spatial_x = spatial[:, 0]
-        spatial_y = spatial[:, 1]
-
-        # Compute co-occurrence probabilities using the fast numba routine.
-        out = _co_occurrence_helper(spatial_x, spatial_y, interval, labs)
-        start = logg.info(
-            f"Calculating co-occurrence probabilities for `{len(interval)}` intervals using `{n_jobs}` core(s) and `{n_splits}` splits"
-        )
+    spatial_x = spatial[:, 0]
+    spatial_y = spatial[:, 1]
+
+    # Compute co-occurrence probabilities using the fast numba routine.
+    out = _co_occurrence_helper(spatial_x, spatial_y, interval, labs)
+    start = logg.info(
+        f"Calculating co-occurrence probabilities for `{len(interval)}` intervals using `{n_jobs}` core(s) and `{n_splits}` splits"
+    )
 
     if copy:
         logg.info("Finish", time=start)
diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py
index 91502ba2f..6752b1548 100644
--- a/tests/graph/test_ppatterns.py
+++ b/tests/graph/test_ppatterns.py
@@ -130,24 +130,6 @@ def test_co_occurrence(adata: AnnData):
     assert arr.shape[2] == 49
     assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0]
 
-def test_co_occurrence_rs(adata: AnnData):
-    """
-    check co_occurrence score and shape (rust implementation)
-    """
-    co_occurrence(adata, cluster_key="leiden", use_rust=True)
-
-    # assert occurrence in adata.uns
-    assert "leiden_co_occurrence" in adata.uns.keys()
-    assert "occ" in adata.uns["leiden_co_occurrence"].keys()
-    assert "interval" in adata.uns["leiden_co_occurrence"].keys()
-
-    # assert shapes
-    arr = adata.uns["leiden_co_occurrence"]["occ"]
-    assert arr.ndim == 3
-    assert arr.shape[2] == 49
-    assert arr.shape[1] == arr.shape[0] == adata.obs["leiden"].unique().shape[0]
-
-
 
 # @pytest.mark.parametrize(("ys", "xs"), [(10, 10), (None, None), (10, 20)])
 @pytest.mark.parametrize(("n_jobs", "n_splits"), [(1, 2), (2, 2)])
@@ -173,19 +155,6 @@ def test_co_occurrence_explicit_interval(adata: AnnData, size: int):
         assert interval is not interval_1
         np.testing.assert_allclose(interval, interval_1)  # allclose because in the func, we use f32
 
-@pytest.mark.parametrize("size", [1, 3])
-def test_co_occurrence_rs_explicit_interval(adata: AnnData, size: int):
-    minn, maxx = _find_min_max(adata.obsm[Key.obsm.spatial])
-    interval = np.linspace(minn, maxx, size)
-    if size == 1:
-        with pytest.raises(ValueError, match=r"Expected interval to be of length"):
-            _ = co_occurrence(adata, cluster_key="leiden", copy=True, interval=interval, use_rust=True)
-    else:
-        _, interval_1 = co_occurrence_rs(adata, cluster_key="leiden", copy=True, interval=interval, use_rust=True)
-
-        assert interval is not interval_1
-        np.testing.assert_allclose(interval, interval_1)  # allclose because in the func, we use f32
-
 
 def test_use_raw(dummy_adata: AnnData):
     var_names = [str(i) for i in range(10)]

From 92f3da5a633020dbb81218452530590a96d383c1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 19 Mar 2025 15:03:00 +0000
Subject: [PATCH 07/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/gr/_ppatterns.py  | 20 +++++---------------
 tests/graph/test_ppatterns.py |  1 +
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 36cb4e4ec..e6c992d58 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -275,14 +275,8 @@ def _score_helper(
 
 @njit(parallel=True, fastmath=True)
 def _occur_count(
-    spatial_x: NDArrayA,
-    spatial_y:NDArrayA,
-    thresholds: NDArrayA,
-    label_idx: NDArrayA,
-    n: int,
-    k: int,
-    l_val: int
-)-> NDArrayA:
+    spatial_x: NDArrayA, spatial_y: NDArrayA, thresholds: NDArrayA, label_idx: NDArrayA, n: int, k: int, l_val: int
+) -> NDArrayA:
     # Allocate a 2D array to store a flat local result per point.
     local_results = np.zeros((n, l_val * k * k), dtype=np.int32)
     for i in prange(n):
@@ -327,12 +321,7 @@ def _occur_count(
     return result
 
 
-def _co_occurrence_helper(
-    v_x: NDArrayA,
-    v_y: NDArrayA,
-    v_radium: NDArrayA,
-    labs: NDArrayA
-)-> NDArrayA:
+def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA:
     """
     Fast co-occurrence probability computation using the new numba-accelerated counting.
 
@@ -412,6 +401,7 @@ def _co_occurrence_helper(
 
     return occ_prob
 
+
 @d.dedent
 def co_occurrence(
     adata: AnnData | SpatialData,
@@ -422,7 +412,7 @@ def co_occurrence(
     n_splits: int | None = None,
     n_jobs: int | None = None,
     backend: str = "loky",
-    show_progress_bar: bool = True
+    show_progress_bar: bool = True,
 ) -> tuple[NDArrayA, NDArrayA] | None:
     """
     Compute co-occurrence probability of clusters.
diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py
index c0b6b45fe..6752b1548 100644
--- a/tests/graph/test_ppatterns.py
+++ b/tests/graph/test_ppatterns.py
@@ -155,6 +155,7 @@ def test_co_occurrence_explicit_interval(adata: AnnData, size: int):
         assert interval is not interval_1
         np.testing.assert_allclose(interval, interval_1)  # allclose because in the func, we use f32
 
+
 def test_use_raw(dummy_adata: AnnData):
     var_names = [str(i) for i in range(10)]
     raw = dummy_adata[:, dummy_adata.var_names[: len(var_names)]].copy()

From 52a5fae7e8ae040b1828a2733e25f26858172eea Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Thu, 12 Jun 2025 14:44:51 +0200
Subject: [PATCH 08/18] Optimize memory access pattern & cache kernel

---
 src/squidpy/gr/_ppatterns.py | 62 ++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 5ada2a694..9a18bbaf5 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -266,54 +266,46 @@ def _score_helper(
 
     return score_perms
 
-
 @njit(parallel=True, fastmath=True)
 def _occur_count(
-    spatial_x: NDArrayA, spatial_y: NDArrayA, thresholds: NDArrayA, label_idx: NDArrayA, n: int, k: int, l_val: int
+    spatial_x: NDArrayA,
+    spatial_y: NDArrayA,
+    thresholds: NDArrayA,
+    label_idx: NDArrayA,
+    n: int,
+    k: int,
+    l_val: int
 ) -> NDArrayA:
     # Allocate a 2D array to store a flat local result per point.
-    local_results = np.zeros((n, l_val * k * k), dtype=np.int32)
+    k2 = k * k
+    local_results = np.zeros((n, l_val * k2), dtype=np.int32)
+
     for i in prange(n):
-        local_counts = np.zeros(l_val * k * k, dtype=np.int32)
+        local_counts = np.zeros(l_val * k2, dtype=np.int32)
+
         for j in range(n):
             if i == j:
                 continue
             dx = spatial_x[i] - spatial_x[j]
             dy = spatial_y[i] - spatial_y[j]
-            dist_sq = dx * dx + dy * dy
-            for r in range(l_val):
-                thresh = thresholds[r]
-                if dist_sq <= thresh:
-                    index = r * (k * k) + label_idx[i] * k + label_idx[j]
-                    local_counts[index] += 1
-                # else:
-                #    break  # If this threshold fails, smaller ones will also fail.
-        for m in range(l_val * k * k):
-            local_results[i, m] = local_counts[m]
-
-    # Reduce over all points: sum the local counts.
-    result_flat = np.zeros(l_val * k * k, dtype=np.int32)
-    for m in range(l_val * k * k):
-        s = 0
-        for i in range(n):
-            s += local_results[i, m]
-        result_flat[m] = s
-
-    # Reshape to a 3D array with shape (interval, k, k)
-    counts = np.empty((l_val, k, k), dtype=np.int32)
-    for r in range(l_val):
-        for i in range(k):
-            for j in range(k):
-                counts[r, i, j] = result_flat[r * (k * k) + i * k + j]
+            d2 = dx * dx + dy * dy
+
+            pair = label_idx[i] * k + label_idx[j]   # fixed in r–loop
+            base = pair * l_val                      # first cell for that pair
 
-    # Rearrange axes to match the original convention: (k, k, interval)
-    result = np.empty((k, k, l_val), dtype=np.int32)
-    for i in range(k):
-        for j in range(k):
             for r in range(l_val):
-                result[i, j, r] = counts[r, i, j]
-    return result
+                if d2 <= thresholds[r]:
+                    local_counts[base + r] += 1
+        local_results[i, :] = local_counts
 
+        # for m in range(l_val * k * k):
+            # local_results[i, m] = local_counts[m]
+
+    # reduction and reshape stay the same
+    result_flat = local_results.sum(axis=0)
+    result = result_flat.reshape(k, k, l_val)
+
+    return result
 
 def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA:
     """

From cee646ae2d72f8d7f685058c836f7afded835789 Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Thu, 12 Jun 2025 15:06:41 +0200
Subject: [PATCH 09/18] jit the outer function and parallelize

---
 src/squidpy/gr/_ppatterns.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 9a18bbaf5..cdc882bd8 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -266,7 +266,7 @@ def _score_helper(
 
     return score_perms
 
-@njit(parallel=True, fastmath=True)
+@njit(parallel=True, fastmath=True, cache=True)
 def _occur_count(
     spatial_x: NDArrayA,
     spatial_y: NDArrayA,
@@ -296,10 +296,8 @@ def _occur_count(
             for r in range(l_val):
                 if d2 <= thresholds[r]:
                     local_counts[base + r] += 1
-        local_results[i, :] = local_counts
 
-        # for m in range(l_val * k * k):
-            # local_results[i, m] = local_counts[m]
+        local_results[i, :] = local_counts
 
     # reduction and reshape stay the same
     result_flat = local_results.sum(axis=0)
@@ -307,7 +305,12 @@ def _occur_count(
 
     return result
 
-def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA:
+@njit(parallel=True, fastmath=True, cache=True)
+def _co_occurrence_helper(
+        v_x: NDArrayA,
+        v_y: NDArrayA,
+        v_radium: NDArrayA,
+        labs: NDArrayA) -> NDArrayA:
     """
     Fast co-occurrence probability computation using the new numba-accelerated counting.
 
@@ -342,7 +345,7 @@ def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs
 
     # Compute co-occurrence probabilities for each threshold bin.
     occ_prob = np.empty((k, k, l_val), dtype=np.float32)
-    for r in range(l_val):
+    for r in prange(l_val):
         co_occur = counts[:, :, r].astype(np.float32)
 
         # Compute the total count for this threshold.

From 05dd724cb166137e525db3fa5a26ae7d4463a4be Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 12 Jun 2025 13:08:08 +0000
Subject: [PATCH 10/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/gr/_ppatterns.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index cdc882bd8..873b757b6 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -266,15 +266,10 @@ def _score_helper(
 
     return score_perms
 
+
 @njit(parallel=True, fastmath=True, cache=True)
 def _occur_count(
-    spatial_x: NDArrayA,
-    spatial_y: NDArrayA,
-    thresholds: NDArrayA,
-    label_idx: NDArrayA,
-    n: int,
-    k: int,
-    l_val: int
+    spatial_x: NDArrayA, spatial_y: NDArrayA, thresholds: NDArrayA, label_idx: NDArrayA, n: int, k: int, l_val: int
 ) -> NDArrayA:
     # Allocate a 2D array to store a flat local result per point.
     k2 = k * k
@@ -290,8 +285,8 @@ def _occur_count(
             dy = spatial_y[i] - spatial_y[j]
             d2 = dx * dx + dy * dy
 
-            pair = label_idx[i] * k + label_idx[j]   # fixed in r–loop
-            base = pair * l_val                      # first cell for that pair
+            pair = label_idx[i] * k + label_idx[j]  # fixed in r–loop
+            base = pair * l_val  # first cell for that pair
 
             for r in range(l_val):
                 if d2 <= thresholds[r]:
@@ -305,12 +300,9 @@ def _occur_count(
 
     return result
 
+
 @njit(parallel=True, fastmath=True, cache=True)
-def _co_occurrence_helper(
-        v_x: NDArrayA,
-        v_y: NDArrayA,
-        v_radium: NDArrayA,
-        labs: NDArrayA) -> NDArrayA:
+def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs: NDArrayA) -> NDArrayA:
     """
     Fast co-occurrence probability computation using the new numba-accelerated counting.
 

From c490d45d57ccea292c7b5848f3958a9994f4cdc7 Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Thu, 12 Jun 2025 15:16:56 +0200
Subject: [PATCH 11/18] Fix Mypy checking Typing error

---
 src/squidpy/gr/_ppatterns.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index cdc882bd8..cbcf700a3 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -5,7 +5,8 @@
 from collections.abc import Iterable, Sequence
 from importlib.util import find_spec
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any, Literal, cast
+from numpy.typing import NDArray
 
 import numba.types as nt
 import numpy as np
@@ -303,7 +304,7 @@ def _occur_count(
     result_flat = local_results.sum(axis=0)
     result = result_flat.reshape(k, k, l_val)
 
-    return result
+    return cast(NDArray[np.int32], result)
 
 @njit(parallel=True, fastmath=True, cache=True)
 def _co_occurrence_helper(

From d0884ca02df45b1df92db4357b21c827097119bb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 12 Jun 2025 13:19:46 +0000
Subject: [PATCH 12/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/gr/_ppatterns.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 5d2993e54..244471352 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -6,7 +6,6 @@
 from importlib.util import find_spec
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Literal, cast
-from numpy.typing import NDArray
 
 import numba.types as nt
 import numpy as np
@@ -14,6 +13,7 @@
 from anndata import AnnData
 from numba import njit, prange
 from numpy.random import default_rng
+from numpy.typing import NDArray
 from scanpy import logging as logg
 from scanpy.get import _get_obs_rep
 from scanpy.metrics._gearys_c import _gearys_c

From 6e5d1df6e584328f05c6db8cf25ceea6e1b01397 Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Thu, 12 Jun 2025 15:47:53 +0200
Subject: [PATCH 13/18] Disable the cast typing in jit

---
 src/squidpy/gr/_ppatterns.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 5d2993e54..3db050c50 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -5,8 +5,7 @@
 from collections.abc import Iterable, Sequence
 from importlib.util import find_spec
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Literal, cast
-from numpy.typing import NDArray
+from typing import TYPE_CHECKING, Any, Literal
 
 import numba.types as nt
 import numpy as np
@@ -297,9 +296,7 @@ def _occur_count(
 
     # reduction and reshape stay the same
     result_flat = local_results.sum(axis=0)
-    result = result_flat.reshape(k, k, l_val)
-
-    return cast(NDArray[np.int32], result)
+    return result_flat.reshape(k, k, l_val).copy()
 
 
 @njit(parallel=True, fastmath=True, cache=True)
@@ -337,9 +334,9 @@ def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs
     counts = _occur_count(v_x, v_y, thresholds, labs, n, k, l_val)
 
     # Compute co-occurrence probabilities for each threshold bin.
-    occ_prob = np.empty((k, k, l_val), dtype=np.float32)
+    occ_prob = np.empty((k, k, l_val), dtype=np.float64)
     for r in prange(l_val):
-        co_occur = counts[:, :, r].astype(np.float32)
+        co_occur = counts[:, :, r].astype(np.float64)
 
         # Compute the total count for this threshold.
         total = 0.0
@@ -348,7 +345,7 @@ def _co_occurrence_helper(v_x: NDArrayA, v_y: NDArrayA, v_radium: NDArrayA, labs
                 total += co_occur[i, j]
 
         # Compute the normalized probability matrix.
-        probs_matrix = np.zeros((k, k), dtype=np.float32)
+        probs_matrix = np.zeros((k, k), dtype=np.float64)
         if total != 0.0:
             for i in range(k):
                 for j in range(k):

From a62128b3d2d84731ae07ddf76eb401a2d42bf4a5 Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Thu, 12 Jun 2025 16:20:02 +0200
Subject: [PATCH 14/18] Try fix typing check error by mypy

---
 src/squidpy/gr/_ppatterns.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 3db050c50..19a3e78fb 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -296,7 +296,9 @@ def _occur_count(
 
     # reduction and reshape stay the same
     result_flat = local_results.sum(axis=0)
-    return result_flat.reshape(k, k, l_val).copy()
+    result = result_flat.reshape(k, k, l_val).astype(np.int32)
+
+    return result
 
 
 @njit(parallel=True, fastmath=True, cache=True)

From 0aab85671732ce0f1ddb89b11942122bafb8037a Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Sat, 14 Jun 2025 20:58:20 +0200
Subject: [PATCH 15/18] Try: fix typing check error by mypy

---
 src/squidpy/gr/_ppatterns.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 19a3e78fb..17b1b5578 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -276,7 +276,7 @@ def _occur_count(
     local_results = np.zeros((n, l_val * k2), dtype=np.int32)
 
     for i in prange(n):
-        local_counts = np.zeros(l_val * k2, dtype=np.int32)
+        local_counts: NDArrayA = np.zeros(l_val * k2, dtype=np.int32)
 
         for j in range(n):
             if i == j:
@@ -292,11 +292,11 @@ def _occur_count(
                 if d2 <= thresholds[r]:
                     local_counts[base + r] += 1
 
-        local_results[i, :] = local_counts
+        local_results[i] = local_counts
 
     # reduction and reshape stay the same
     result_flat = local_results.sum(axis=0)
-    result = result_flat.reshape(k, k, l_val).astype(np.int32)
+    result = result_flat.reshape(k, k, l_val).copy()
 
     return result
 

From 9ac269392846ecc043c81085dda9160ba16d162d Mon Sep 17 00:00:00 2001
From: Wenjie SUN <sunwjie@gmail.com>
Date: Sat, 14 Jun 2025 21:02:28 +0200
Subject: [PATCH 16/18] Try: fix typing check error by mypy

---
 src/squidpy/gr/_ppatterns.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py
index 17b1b5578..6e3230be0 100644
--- a/src/squidpy/gr/_ppatterns.py
+++ b/src/squidpy/gr/_ppatterns.py
@@ -296,7 +296,7 @@ def _occur_count(
 
     # reduction and reshape stay the same
     result_flat = local_results.sum(axis=0)
-    result = result_flat.reshape(k, k, l_val).copy()
+    result: NDArrayA = result_flat.reshape(k, k, l_val).copy()
 
     return result
 

From 482d0d8a67f0609a50bcacc838ce34990984a57b Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Thu, 10 Jul 2025 15:07:10 +0200
Subject: [PATCH 17/18] init for benchmark

---
 .github/workflows/benchmark.yml          |  62 +++++++
 benchmarks/.asv/results/benchmarks.json  |  27 +++
 benchmarks/.asv/results/mac/machine.json |   9 +
 benchmarks/README.md                     |  21 +++
 benchmarks/asv.conf.json                 | 171 ++++++++++++++++++
 benchmarks/benchmarks/__init__.py        |   1 +
 benchmarks/benchmarks/_utils.py          | 216 +++++++++++++++++++++++
 benchmarks/benchmarks/co_occurence.py    |  34 ++++
 8 files changed, 541 insertions(+)
 create mode 100644 .github/workflows/benchmark.yml
 create mode 100644 benchmarks/.asv/results/benchmarks.json
 create mode 100644 benchmarks/.asv/results/mac/machine.json
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/asv.conf.json
 create mode 100644 benchmarks/benchmarks/__init__.py
 create mode 100644 benchmarks/benchmarks/_utils.py
 create mode 100644 benchmarks/benchmarks/co_occurence.py

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 000000000..1e952440b
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,62 @@
+name: Benchmark
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+env:
+  FORCE_COLOR: "1"
+
+defaults:
+  run:
+    shell: bash -e {0} # -e to fail on error
+
+jobs:
+  benchmark:
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python: ["3.13"]
+        os: [ubuntu-latest]
+
+    env:
+      OS: ${{ matrix.os }}
+      PYTHON: ${{ matrix.python }}
+      ASV_DIR: "./benchmarks"
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Fetch main branch for `asv run`’s hash
+        run: git fetch origin main:main
+        if: ${{ github.ref_name != 'main' }}
+
+      - name: Set up Python ${{ matrix.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          cache: 'pip'
+
+      - name: Cache datasets
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache
+          key: benchmark-state-${{ hashFiles('benchmarks/**') }}
+
+      - name: Install dependencies
+        run: pip install 'asv>=0.6.4'
+
+      - name: Configure ASV
+        working-directory: ${{ env.ASV_DIR }}
+        run: asv machine --yes
+
+      - name: Quick benchmark run
+        working-directory: ${{ env.ASV_DIR }}
+        run: asv run --dry-run --quick --show-stderr --verbose HEAD^!
diff --git a/benchmarks/.asv/results/benchmarks.json b/benchmarks/.asv/results/benchmarks.json
new file mode 100644
index 000000000..708d1c7d3
--- /dev/null
+++ b/benchmarks/.asv/results/benchmarks.json
@@ -0,0 +1,27 @@
+{
+    "co_occurence.peakmem_co_occurrence": {
+        "code": "def peakmem_co_occurrence():\n    sq.gr.co_occurrence(adata, cluster_key=\"cell type\")\n\ndef setup():\n    global adata  # noqa: PLW0603\n    adata = sq.datasets.imc()",
+        "name": "co_occurence.peakmem_co_occurrence",
+        "param_names": [],
+        "params": [],
+        "type": "peakmemory",
+        "unit": "bytes",
+        "version": "8e588751fbcc15d56cfe73dbb3844752d6cac545c868bca1ce1045f0a54e301d"
+    },
+    "co_occurence.time_co_occurrence": {
+        "code": "def time_co_occurrence():\n    sq.gr.co_occurrence(adata, cluster_key=\"cell type\")\n\ndef setup():\n    global adata  # noqa: PLW0603\n    adata = sq.datasets.imc()",
+        "min_run_count": 2,
+        "name": "co_occurence.time_co_occurrence",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "e764c1b999e315fffac8cac6708ede350ff254bd0b05a9be580a43ea3689d614",
+        "warmup_time": -1
+    },
+    "version": 2
+}
\ No newline at end of file
diff --git a/benchmarks/.asv/results/mac/machine.json b/benchmarks/.asv/results/mac/machine.json
new file mode 100644
index 000000000..bcb50937c
--- /dev/null
+++ b/benchmarks/.asv/results/mac/machine.json
@@ -0,0 +1,9 @@
+{
+    "arch": "arm64",
+    "cpu": "Apple M1",
+    "machine": "mac",
+    "num_cpu": "8",
+    "os": "Darwin 23.1.0",
+    "ram": "17179869184",
+    "version": 1
+}
\ No newline at end of file
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 000000000..3546e79ea
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,21 @@
+# Scanpy Benchmarks
+
+This directory contains code for benchmarking Scanpy using [asv][].
+
+The functionality is checked using the [`benchmark.yml`][] workflow.
+Benchmarks are run using the [benchmark bot][].
+
+[asv]: https://asv.readthedocs.io/
+[`benchmark.yml`]: ../.github/workflows/benchmark.yml
+[benchmark bot]: https://github.com/apps/scverse-benchmark
+
+## Data processing in benchmarks
+
+Each dataset is processed so it has
+
+- `.layers['counts']` (containing data in C/row-major format) and `.layers['counts-off-axis']` (containing data in FORTRAN/column-major format)
+- `.X` and `.layers['off-axis']` with log-transformed data (formats like above)
+- a `.var['mt']` boolean column indicating mitochondrial genes
+
+The benchmarks are set up so the `layer` parameter indicates the layer that will be moved into `.X` before the benchmark.
+That way, we don’t need to add `layer=layer` everywhere.
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
new file mode 100644
index 000000000..907a6292e
--- /dev/null
+++ b/benchmarks/asv.conf.json
@@ -0,0 +1,171 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "squidpy",
+
+    // The project's homepage
+    "project_url": "https://squidpy.readthedocs.io/",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "..",
+
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+
+    // Customizable commands for building, installing, and
+    // uninstalling the project. See asv.conf.json documentation.
+    //
+    // "install_command": ["python -mpip install {wheel_file}"],
+    // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+    "build_command": [
+        "python -m pip install build",
+        "python -m build --wheel -o {build_cache_dir} {build_dir}",
+    ],
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["main"], // for git
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "conda",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/scverse/squidpy/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["3.11", "3.13"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    "conda_channels": ["conda-forge", "defaults"],
+
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list or empty string indicates to just test against the default
+    // (latest) version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed via
+    // pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    "matrix": {
+        "numpy": [""],
+        // "scipy": ["1.2", ""],
+        "scipy": [""],
+        "h5py": [""],
+        "natsort": [""],
+        "pandas": [""],
+        "memory_profiler": [""],
+        "zarr": ["2.18.4"],
+        "pytest": [""],
+        "scanpy": [""],
+        "python-igraph": [""],
+        // "psutil": [""]
+        "pooch": [""],
+        "scikit-image": [""],
+        // "scikit-misc": [""],
+    },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "numpy": "1.8"},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    // "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": ".asv/results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": ".asv/html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    // "build_cache_size": 2,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // },
+}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
new file mode 100644
index 000000000..255d98fe4
--- /dev/null
+++ b/benchmarks/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""ASV benchmark suite for sqidpy."""
diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py
new file mode 100644
index 000000000..e27e7e70d
--- /dev/null
+++ b/benchmarks/benchmarks/_utils.py
@@ -0,0 +1,216 @@
+from __future__ import annotations
+
+import itertools
+import warnings
+from functools import cache
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pooch
+from anndata import concat
+from asv_runner.benchmarks.mark import skip_for_params
+import squidpy as sq
+
+import scanpy as sc
+from scanpy._compat import CSRBase
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Sequence
+    from collections.abc import Set as AbstractSet
+    from typing import Literal, Protocol, TypeVar
+
+    from anndata import AnnData
+
+    from scanpy._compat import CSCBase
+
+    C = TypeVar("C", bound=Callable)
+
+    class ParamSkipper(Protocol):
+        def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ...
+
+    Dataset = Literal["pbmc68k_reduced", "pbmc3k", "bmmc", "lung93k"]
+    KeyX = Literal[None, "off-axis"]
+    KeyCount = Literal["counts", "counts-off-axis"]
+
+
+
+
+@cache
+def _pbmc68k_reduced() -> AnnData:
+    """A small datasets with a dense `.X`."""  # noqa: D401
+    adata = sc.datasets.pbmc68k_reduced()
+    assert isinstance(adata.X, np.ndarray)
+    assert not np.isfortran(adata.X)
+
+    # raw has the same number of genes, so we can use it for counts
+    # it doesn’t actually contain counts for some reason, but close enough
+    assert isinstance(adata.raw.X, CSRBase)
+    adata.layers["counts"] = adata.raw.X.toarray(order="C")
+    mapper = dict(
+        percent_mito="pct_counts_mt",
+        n_counts="total_counts",
+    )
+    adata.obs.rename(columns=mapper, inplace=True)
+    return adata
+
+
+def pbmc68k_reduced() -> AnnData:
+    return _pbmc68k_reduced().copy()
+
+
+@cache
+def _pbmc3k() -> AnnData:
+    adata = sc.datasets.pbmc3k()
+    assert isinstance(adata.X, CSRBase)
+    adata.layers["counts"] = adata.X.astype(np.int32, copy=True)
+    sc.pp.log1p(adata)
+    return adata
+
+
+def pbmc3k() -> AnnData:
+    return _pbmc3k().copy()
+
+
+@cache
+def _bmmc(n_obs: int = 4000) -> AnnData:
+    registry = pooch.create(
+        path=pooch.os_cache("pooch"),
+        base_url="doi:10.6084/m9.figshare.22716739.v1/",
+    )
+    registry.load_registry_from_doi()
+    samples = {smp: f"{smp}_filtered_feature_bc_matrix.h5" for smp in ("s1d1", "s1d3")}
+    adatas = {}
+
+    for sample_id, filename in samples.items():
+        path = registry.fetch(filename)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", r"Variable names are not unique")
+            sample_adata = sc.read_10x_h5(path)
+        sample_adata.var_names_make_unique()
+        sc.pp.subsample(sample_adata, n_obs=n_obs // len(samples))
+        adatas[sample_id] = sample_adata
+
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", r"Observation names are not unique")
+        adata = concat(adatas, label="sample")
+    adata.obs_names_make_unique()
+
+    assert isinstance(adata.X, CSRBase)
+    adata.layers["counts"] = adata.X.astype(np.int32, copy=True)
+    sc.pp.log1p(adata)
+    adata.obs["n_counts"] = adata.layers["counts"].sum(axis=1).A1
+    return adata
+
+
+def bmmc(n_obs: int = 400) -> AnnData:
+    return _bmmc(n_obs).copy()
+
+
+@cache
+def _lung93k() -> AnnData:
+    path = pooch.retrieve(
+        url="https://figshare.com/ndownloader/files/45788454",
+        known_hash="md5:4f28af5ff226052443e7e0b39f3f9212",
+    )
+    adata = sc.read_h5ad(path)
+    assert isinstance(adata.X, CSRBase)
+    adata.layers["counts"] = adata.X.astype(np.int32, copy=True)
+    sc.pp.log1p(adata)
+    return adata
+
+
+def lung93k() -> AnnData:
+    return _lung93k().copy()
+
+
+def to_off_axis(x: np.ndarray | CSRBase) -> np.ndarray | CSCBase:
+    if isinstance(x, CSRBase):
+        return x.tocsc()
+    if isinstance(x, np.ndarray):
+        assert not np.isfortran(x)
+        return x.copy(order="F")
+    msg = f"Unexpected type {type(x)}"
+    raise TypeError(msg)
+
+
+def _get_dataset_raw(dataset: Dataset) -> tuple[AnnData, str | None]:
+    match dataset:
+        case "pbmc68k_reduced":
+            adata, batch_key = pbmc68k_reduced(), None
+        case "pbmc3k":
+            adata, batch_key = pbmc3k(), None  # can’t use this with batches
+        case "bmmc":
+            # TODO: allow specifying bigger variant
+            adata, batch_key = bmmc(400), "sample"
+        case "lung93k":
+            adata, batch_key = lung93k(), "PatientNumber"
+        case _:
+            msg = f"Unknown dataset {dataset}"
+            raise AssertionError(msg)
+
+    # add off-axis layers
+    adata.layers["off-axis"] = to_off_axis(adata.X)
+    adata.layers["counts-off-axis"] = to_off_axis(adata.layers["counts"])
+
+    # add mitochondrial gene and pre-compute qc metrics
+    adata.var["mt"] = adata.var_names.str.startswith("MT-")
+    assert adata.var["mt"].sum() > 0, "no MT genes in dataset"
+    sc.pp.calculate_qc_metrics(
+        adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
+    )
+
+    return adata, batch_key
+
+
+def get_dataset(dataset: Dataset, *, layer: KeyX = None) -> tuple[AnnData, str | None]:
+    adata, batch_key = _get_dataset_raw(dataset)
+    if layer is not None:
+        adata.X = adata.layers.pop(layer)
+    return adata, batch_key
+
+
+def get_count_dataset(
+    dataset: Dataset, *, layer: KeyCount = "counts"
+) -> tuple[AnnData, str | None]:
+    adata, batch_key = _get_dataset_raw(dataset)
+
+    adata.X = adata.layers.pop(layer)
+    # remove indicators that X was transformed
+    adata.uns.pop("log1p", None)
+
+    return adata, batch_key
+
+
+def param_skipper(
+    param_names: Sequence[str], params: tuple[Sequence[object], ...]
+) -> ParamSkipper:
+    """Create a decorator that will skip all combinations that contain any of the given parameters.
+
+    Examples
+    --------
+    >>> param_names = ["letters", "numbers"]
+    >>> params = [["a", "b"], [3, 4, 5]]
+    >>> skip_when = param_skipper(param_names, params)
+
+    >>> @skip_when(letters={"a"}, numbers={3})
+    ... def func(a, b):
+    ...     print(a, b)
+    >>> run_as_asv_benchmark(func)
+    b 4
+    b 5
+
+    """
+
+    def skip(**skipped: AbstractSet) -> Callable[[C], C]:
+        skipped_combs = [
+            tuple(record.values())
+            for record in (
+                dict(zip(param_names, vals, strict=True))
+                for vals in itertools.product(*params)
+            )
+            if any(v in skipped.get(n, set()) for n, v in record.items())
+        ]
+        # print(skipped_combs, file=sys.stderr)
+        return skip_for_params(skipped_combs)
+
+    return skip
diff --git a/benchmarks/benchmarks/co_occurence.py b/benchmarks/benchmarks/co_occurence.py
new file mode 100644
index 000000000..dd62144ff
--- /dev/null
+++ b/benchmarks/benchmarks/co_occurence.py
@@ -0,0 +1,34 @@
+"""Benchmark tool operations in Squidpy.
+
+API documentation: <https://squidpy.readthedocs.io/en/stable/api>.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import scanpy as sc
+import squidpy as sq
+
+from ._utils import pbmc68k_reduced
+
+if TYPE_CHECKING:
+    from anndata import AnnData
+
+# setup variables
+
+adata: AnnData
+
+
+def setup():
+    global adata  # noqa: PLW0603
+    adata = sq.datasets.imc()
+
+
+def time_co_occurrence():
+    sq.gr.co_occurrence(adata, cluster_key="cell type")
+
+
+def peakmem_co_occurrence():
+    sq.gr.co_occurrence(adata, cluster_key="cell type")
+

From 18a767cd160d8580bdac990878bcd15ced724902 Mon Sep 17 00:00:00 2001
From: selmanozleyen <syozleyen@gmail.com>
Date: Thu, 10 Jul 2025 15:14:07 +0200
Subject: [PATCH 18/18] add the array order and stuff later. Also sparse vs
 dense later maybe

---
 benchmarks/README.md                  |   7 +-
 benchmarks/asv.conf.json              |   5 +-
 benchmarks/benchmarks/_utils.py       | 216 --------------------------
 benchmarks/benchmarks/co_occurence.py |   2 -
 4 files changed, 7 insertions(+), 223 deletions(-)
 delete mode 100644 benchmarks/benchmarks/_utils.py

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 3546e79ea..8a0bf5270 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,16 +1,17 @@
-# Scanpy Benchmarks
+# Squidpy Benchmarks
 
-This directory contains code for benchmarking Scanpy using [asv][].
+This directory contains code for benchmarking Squidpy using [asv][].
 
 The functionality is checked using the [`benchmark.yml`][] workflow.
 Benchmarks are run using the [benchmark bot][].
 
 [asv]: https://asv.readthedocs.io/
 [`benchmark.yml`]: ../.github/workflows/benchmark.yml
-[benchmark bot]: https://github.com/apps/scverse-benchmark
+[benchmark bot]: https://github.com/apps/scverse-benchmark # TODO
 
 ## Data processing in benchmarks
 
+# TODO
 Each dataset is processed so it has
 
 - `.layers['counts']` (containing data in C/row-major format) and `.layers['counts-off-axis']` (containing data in FORTRAN/column-major format)
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index 907a6292e..c48b03c81 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -78,9 +78,10 @@
         "natsort": [""],
         "pandas": [""],
         "memory_profiler": [""],
-        "zarr": ["2.18.4"],
+        // "zarr": ["2.18.4"],
         "pytest": [""],
-        "scanpy": [""],
+        // "scanpy": [""],
+        "squidpy": [""],
         "python-igraph": [""],
         // "psutil": [""]
         "pooch": [""],
diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py
deleted file mode 100644
index e27e7e70d..000000000
--- a/benchmarks/benchmarks/_utils.py
+++ /dev/null
@@ -1,216 +0,0 @@
-from __future__ import annotations
-
-import itertools
-import warnings
-from functools import cache
-from typing import TYPE_CHECKING
-
-import numpy as np
-import pooch
-from anndata import concat
-from asv_runner.benchmarks.mark import skip_for_params
-import squidpy as sq
-
-import scanpy as sc
-from scanpy._compat import CSRBase
-
-if TYPE_CHECKING:
-    from collections.abc import Callable, Sequence
-    from collections.abc import Set as AbstractSet
-    from typing import Literal, Protocol, TypeVar
-
-    from anndata import AnnData
-
-    from scanpy._compat import CSCBase
-
-    C = TypeVar("C", bound=Callable)
-
-    class ParamSkipper(Protocol):
-        def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ...
-
-    Dataset = Literal["pbmc68k_reduced", "pbmc3k", "bmmc", "lung93k"]
-    KeyX = Literal[None, "off-axis"]
-    KeyCount = Literal["counts", "counts-off-axis"]
-
-
-
-
-@cache
-def _pbmc68k_reduced() -> AnnData:
-    """A small datasets with a dense `.X`."""  # noqa: D401
-    adata = sc.datasets.pbmc68k_reduced()
-    assert isinstance(adata.X, np.ndarray)
-    assert not np.isfortran(adata.X)
-
-    # raw has the same number of genes, so we can use it for counts
-    # it doesn’t actually contain counts for some reason, but close enough
-    assert isinstance(adata.raw.X, CSRBase)
-    adata.layers["counts"] = adata.raw.X.toarray(order="C")
-    mapper = dict(
-        percent_mito="pct_counts_mt",
-        n_counts="total_counts",
-    )
-    adata.obs.rename(columns=mapper, inplace=True)
-    return adata
-
-
-def pbmc68k_reduced() -> AnnData:
-    return _pbmc68k_reduced().copy()
-
-
-@cache
-def _pbmc3k() -> AnnData:
-    adata = sc.datasets.pbmc3k()
-    assert isinstance(adata.X, CSRBase)
-    adata.layers["counts"] = adata.X.astype(np.int32, copy=True)
-    sc.pp.log1p(adata)
-    return adata
-
-
-def pbmc3k() -> AnnData:
-    return _pbmc3k().copy()
-
-
-@cache
-def _bmmc(n_obs: int = 4000) -> AnnData:
-    registry = pooch.create(
-        path=pooch.os_cache("pooch"),
-        base_url="doi:10.6084/m9.figshare.22716739.v1/",
-    )
-    registry.load_registry_from_doi()
-    samples = {smp: f"{smp}_filtered_feature_bc_matrix.h5" for smp in ("s1d1", "s1d3")}
-    adatas = {}
-
-    for sample_id, filename in samples.items():
-        path = registry.fetch(filename)
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", r"Variable names are not unique")
-            sample_adata = sc.read_10x_h5(path)
-        sample_adata.var_names_make_unique()
-        sc.pp.subsample(sample_adata, n_obs=n_obs // len(samples))
-        adatas[sample_id] = sample_adata
-
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore", r"Observation names are not unique")
-        adata = concat(adatas, label="sample")
-    adata.obs_names_make_unique()
-
-    assert isinstance(adata.X, CSRBase)
-    adata.layers["counts"] = adata.X.astype(np.int32, copy=True)
-    sc.pp.log1p(adata)
-    adata.obs["n_counts"] = adata.layers["counts"].sum(axis=1).A1
-    return adata
-
-
-def bmmc(n_obs: int = 400) -> AnnData:
-    return _bmmc(n_obs).copy()
-
-
-@cache
-def _lung93k() -> AnnData:
-    path = pooch.retrieve(
-        url="https://figshare.com/ndownloader/files/45788454",
-        known_hash="md5:4f28af5ff226052443e7e0b39f3f9212",
-    )
-    adata = sc.read_h5ad(path)
-    assert isinstance(adata.X, CSRBase)
-    adata.layers["counts"] = adata.X.astype(np.int32, copy=True)
-    sc.pp.log1p(adata)
-    return adata
-
-
-def lung93k() -> AnnData:
-    return _lung93k().copy()
-
-
-def to_off_axis(x: np.ndarray | CSRBase) -> np.ndarray | CSCBase:
-    if isinstance(x, CSRBase):
-        return x.tocsc()
-    if isinstance(x, np.ndarray):
-        assert not np.isfortran(x)
-        return x.copy(order="F")
-    msg = f"Unexpected type {type(x)}"
-    raise TypeError(msg)
-
-
-def _get_dataset_raw(dataset: Dataset) -> tuple[AnnData, str | None]:
-    match dataset:
-        case "pbmc68k_reduced":
-            adata, batch_key = pbmc68k_reduced(), None
-        case "pbmc3k":
-            adata, batch_key = pbmc3k(), None  # can’t use this with batches
-        case "bmmc":
-            # TODO: allow specifying bigger variant
-            adata, batch_key = bmmc(400), "sample"
-        case "lung93k":
-            adata, batch_key = lung93k(), "PatientNumber"
-        case _:
-            msg = f"Unknown dataset {dataset}"
-            raise AssertionError(msg)
-
-    # add off-axis layers
-    adata.layers["off-axis"] = to_off_axis(adata.X)
-    adata.layers["counts-off-axis"] = to_off_axis(adata.layers["counts"])
-
-    # add mitochondrial gene and pre-compute qc metrics
-    adata.var["mt"] = adata.var_names.str.startswith("MT-")
-    assert adata.var["mt"].sum() > 0, "no MT genes in dataset"
-    sc.pp.calculate_qc_metrics(
-        adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
-    )
-
-    return adata, batch_key
-
-
-def get_dataset(dataset: Dataset, *, layer: KeyX = None) -> tuple[AnnData, str | None]:
-    adata, batch_key = _get_dataset_raw(dataset)
-    if layer is not None:
-        adata.X = adata.layers.pop(layer)
-    return adata, batch_key
-
-
-def get_count_dataset(
-    dataset: Dataset, *, layer: KeyCount = "counts"
-) -> tuple[AnnData, str | None]:
-    adata, batch_key = _get_dataset_raw(dataset)
-
-    adata.X = adata.layers.pop(layer)
-    # remove indicators that X was transformed
-    adata.uns.pop("log1p", None)
-
-    return adata, batch_key
-
-
-def param_skipper(
-    param_names: Sequence[str], params: tuple[Sequence[object], ...]
-) -> ParamSkipper:
-    """Create a decorator that will skip all combinations that contain any of the given parameters.
-
-    Examples
-    --------
-    >>> param_names = ["letters", "numbers"]
-    >>> params = [["a", "b"], [3, 4, 5]]
-    >>> skip_when = param_skipper(param_names, params)
-
-    >>> @skip_when(letters={"a"}, numbers={3})
-    ... def func(a, b):
-    ...     print(a, b)
-    >>> run_as_asv_benchmark(func)
-    b 4
-    b 5
-
-    """
-
-    def skip(**skipped: AbstractSet) -> Callable[[C], C]:
-        skipped_combs = [
-            tuple(record.values())
-            for record in (
-                dict(zip(param_names, vals, strict=True))
-                for vals in itertools.product(*params)
-            )
-            if any(v in skipped.get(n, set()) for n, v in record.items())
-        ]
-        # print(skipped_combs, file=sys.stderr)
-        return skip_for_params(skipped_combs)
-
-    return skip
diff --git a/benchmarks/benchmarks/co_occurence.py b/benchmarks/benchmarks/co_occurence.py
index dd62144ff..16263725b 100644
--- a/benchmarks/benchmarks/co_occurence.py
+++ b/benchmarks/benchmarks/co_occurence.py
@@ -7,10 +7,8 @@
 
 from typing import TYPE_CHECKING
 
-import scanpy as sc
 import squidpy as sq
 
-from ._utils import pbmc68k_reduced
 
 if TYPE_CHECKING:
     from anndata import AnnData