From accbb4f8b3a12970c861aeb2b3a65b9ca2805480 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Sat, 27 Jun 2026 00:03:57 +0200 Subject: [PATCH 1/5] init --- src/squidpy/_utils.py | 4 ++++ src/squidpy/gr/_ligrec.py | 31 +++++++++++++++++++---------- src/squidpy/gr/_nhood.py | 29 ++++++++++++++++++--------- src/squidpy/gr/_ppatterns.py | 24 +++++++++++++--------- src/squidpy/gr/_ripley.py | 25 +++++++++++++---------- tests/_data/ligrec_no_numba.pickle | Bin 15619 -> 10534 bytes tests/graph/test_ligrec.py | 9 +++++++++ tests/graph/test_nhood.py | 11 ++++++++++ tests/graph/test_ppatterns.py | 21 ++++++++++++++++--- tests/graph/test_ripley.py | 23 +++++++++++++++++++++ 10 files changed, 136 insertions(+), 41 deletions(-) diff --git a/src/squidpy/_utils.py b/src/squidpy/_utils.py index 97197ddd4..791925f43 100644 --- a/src/squidpy/_utils.py +++ b/src/squidpy/_utils.py @@ -243,6 +243,10 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: return wrapper +def _spawn_seeds(seed: int | None, n: int) -> list[np.random.SeedSequence]: + return np.random.SeedSequence(seed).spawn(n) + + def thread_map( fn: Callable[..., Any], items: Iterable[Any], diff --git a/src/squidpy/gr/_ligrec.py b/src/squidpy/gr/_ligrec.py index 362e501f6..d802c236c 100644 --- a/src/squidpy/gr/_ligrec.py +++ b/src/squidpy/gr/_ligrec.py @@ -20,7 +20,14 @@ from squidpy._constants._constants import ComplexPolicy, CorrAxis from squidpy._constants._pkg_constants import Key from squidpy._docs import d, inject_docs -from squidpy._utils import NDArrayA, Signal, SigQueue, _get_n_cores, parallelize +from squidpy._utils import ( + NDArrayA, + Signal, + SigQueue, + _get_n_cores, + _spawn_seeds, + parallelize, +) from squidpy._validators import assert_positive, check_tuple_needles from squidpy.gr._utils import ( _assert_categorical_obs, @@ -753,6 +760,7 @@ def extractor(res: Sequence[TempResult]) -> TempResult: # (n_cells, n_genes) data = np.array(data[data.columns.difference(["clusters"])].values, dtype=np.float64, order="C") # all 3 should be C contiguous + seeds = _spawn_seeds(seed, n_perms) return parallelize( # type: ignore[no-any-return] _analysis_helper, np.arange(n_perms, dtype=np.int32).tolist(), @@ -767,7 +775,7 @@ def extractor(res: Sequence[TempResult]) -> TempResult: interactions, interaction_clusters=interaction_clusters, clustering=clustering, - seed=seed, + seeds=seeds, numba_parallel=numba_parallel, ) @@ -780,7 +788,7 @@ def _analysis_helper( interactions: NDArrayA, interaction_clusters: NDArrayA, clustering: NDArrayA, - seed: int | None = None, + seeds: Sequence[np.random.SeedSequence], numba_parallel: bool | None = None, queue: SigQueue | None = None, ) -> TempResult: @@ -790,7 +798,7 @@ def _analysis_helper( Parameters ---------- perms - Permutation indices. Only used to set the ``seed``. + Permutation indices. Used to index ``seeds`` and to decide which worker returns the means. data Array of shape `(n_cells, n_genes)`. mean @@ -804,8 +812,8 @@ def _analysis_helper( Array of shape `(n_interaction_clusters, 2)`. clustering Array of shape `(n_cells,)` containing the original clustering. - seed - Random seed for :class:`numpy.random.RandomState`. + seeds + One :class:`numpy.random.SeedSequence` per permutation, indexed by the values in ``perms``. numba_parallel Whether to use :func:`numba.prange` or not. If `None`, it's determined automatically. queue @@ -820,9 +828,8 @@ def _analysis_helper( - `'pvalues'` - array of shape `(n_interactions, n_interaction_clusters)` containing `np.sum(T0 > T)` where `T0` is the test statistic under null hypothesis and `T` is the true test statistic. """ - rs = np.random.RandomState(None if seed is None else perms[0] + seed) - - clustering = clustering.copy() + # used as a read-only base; each permutation shuffles its own copy (see the loop below) + clustering_base = clustering.copy() n_cls = mean.shape[1] return_means = np.min(perms) == 0 @@ -848,7 +855,11 @@ def _analysis_helper( res_means = None test = _test - for _ in perms: + for p in perms: + # shuffle from the same base with a per-permutation seed, so each permutation is + # independent of the others and of how the permutations are split across jobs + rs = np.random.RandomState(np.random.MT19937(seeds[p])) + clustering = clustering_base.copy() rs.shuffle(clustering) error = test(interactions, interaction_clusters, data, clustering, mean, mask, res=res) if error: diff --git a/src/squidpy/gr/_nhood.py b/src/squidpy/gr/_nhood.py index c679f42dd..c7b2e1c4a 100644 --- a/src/squidpy/gr/_nhood.py +++ b/src/squidpy/gr/_nhood.py @@ -20,7 +20,14 @@ from squidpy._constants._constants import Centrality from squidpy._constants._pkg_constants import Key from squidpy._docs import d, inject_docs -from squidpy._utils import NDArrayA, Signal, SigQueue, _get_n_cores, parallelize +from squidpy._utils import ( + NDArrayA, + Signal, + SigQueue, + _get_n_cores, + _spawn_seeds, + parallelize, +) from squidpy._validators import assert_positive from squidpy.gr._utils import ( _assert_categorical_obs, @@ -200,6 +207,7 @@ def nhood_enrichment( n_jobs = _get_n_cores(n_jobs) start = logg.info(f"Calculating neighborhood enrichment using `{n_jobs}` core(s)") + seeds = _spawn_seeds(seed, n_perms) perms = parallelize( _nhood_enrichment_helper, @@ -215,7 +223,7 @@ def nhood_enrichment( int_clust=int_clust, libraries=libraries, n_cls=n_cls, - seed=seed, + seeds=seeds, ) zscore = (count - perms.mean(axis=0)) / perms.std(axis=0) @@ -444,19 +452,22 @@ def _nhood_enrichment_helper( int_clust: NDArrayA, libraries: pd.Series[CategoricalDtype] | None, n_cls: int, - seed: int | None = None, + seeds: Sequence[np.random.SeedSequence], queue: SigQueue | None = None, ) -> NDArrayA: perms = np.empty((len(ixs), n_cls, n_cls), dtype=np.float64) - int_clust = int_clust.copy() # threading - rs = np.random.RandomState(seed=None if seed is None else seed + ixs[0]) + int_clust = int_clust.copy() # threading; used as a read-only base for each permutation - for i in range(len(ixs)): + for i, ix in enumerate(ixs): + # shuffle from the same base with a per-permutation seed, so each permutation is + # independent of the others and of how the permutations are split across jobs + rs = np.random.RandomState(np.random.MT19937(seeds[ix])) if libraries is not None: - int_clust = _shuffle_group(int_clust, libraries, rs) + shuffled = _shuffle_group(int_clust, libraries, rs) else: - rs.shuffle(int_clust) - perms[i, ...] = callback(indices, indptr, int_clust) + shuffled = int_clust.copy() + rs.shuffle(shuffled) + perms[i, ...] = callback(indices, indptr, shuffled) if queue is not None: queue.put(Signal.UPDATE) diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index c77085228..f9f5e44b0 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -10,7 +10,6 @@ import pandas as pd from anndata import AnnData from numba import njit, prange -from numpy.random import default_rng from scanpy import logging as logg from scanpy.metrics import gearys_c, morans_i from scipy import stats @@ -23,7 +22,15 @@ from squidpy._constants._constants import SpatialAutocorr from squidpy._constants._pkg_constants import Key from squidpy._docs import d, inject_docs -from squidpy._utils import NDArrayA, Signal, SigQueue, _get_n_cores, deprecated_params, parallelize +from squidpy._utils import ( + NDArrayA, + Signal, + SigQueue, + _get_n_cores, + _spawn_seeds, + deprecated_params, + parallelize, +) from squidpy._validators import assert_key_in_adata, assert_positive from squidpy.gr._utils import ( _assert_categorical_obs, @@ -204,16 +211,16 @@ def extract_obsm(adata: AnnData, ixs: int | Sequence[int] | None) -> tuple[NDArr if n_perms is not None: assert_positive(n_perms, name="n_perms") perms = list(np.arange(n_perms)) + seeds = _spawn_seeds(seed, n_perms) score_perms = parallelize( _score_helper, collection=perms, extractor=np.concatenate, - use_ixs=True, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, - )(mode=mode, g=g, vals=vals, seed=seed) + )(mode=mode, g=g, vals=vals, seeds=seeds) else: score_perms = None @@ -240,20 +247,19 @@ def extract_obsm(adata: AnnData, ixs: int | Sequence[int] | None) -> tuple[NDArr def _score_helper( - ix: int, perms: Sequence[int], mode: SpatialAutocorr, g: spmatrix, vals: NDArrayA, - seed: int | None = None, + seeds: Sequence[np.random.SeedSequence], queue: SigQueue | None = None, ) -> pd.DataFrame: score_perms = np.empty((len(perms), vals.shape[0])) - rng = default_rng(None if seed is None else ix + seed) func = morans_i if mode == SpatialAutocorr.MORAN else gearys_c - for i in range(len(perms)): - idx_shuffle = rng.permutation(g.shape[0]) + for i, p in enumerate(perms): + rs = np.random.RandomState(np.random.MT19937(seeds[p])) + idx_shuffle = rs.permutation(g.shape[0]) score_perms[i, :] = func(g[idx_shuffle, :], vals) if queue is not None: diff --git a/src/squidpy/gr/_ripley.py b/src/squidpy/gr/_ripley.py index 7c92ae357..ad4f8d354 100644 --- a/src/squidpy/gr/_ripley.py +++ b/src/squidpy/gr/_ripley.py @@ -7,7 +7,6 @@ import numpy as np import pandas as pd from anndata import AnnData -from numpy.random import default_rng from scanpy import logging as logg from scipy.spatial import ConvexHull, Delaunay from scipy.spatial.distance import pdist @@ -18,7 +17,7 @@ from squidpy._constants._constants import RipleyStat from squidpy._constants._pkg_constants import Key from squidpy._docs import d, inject_docs -from squidpy._utils import NDArrayA +from squidpy._utils import NDArrayA, _spawn_seeds from squidpy.gr._utils import _assert_categorical_obs, _assert_spatial_basis, _save_data, extract_adata_if_sdata __all__ = ["ripley"] @@ -133,11 +132,12 @@ def ripley( start = logg.info( f"Calculating Ripley's {mode} statistic for `{le.classes_.shape[0]}` clusters and `{n_simulations}` simulations" ) + obs_seed, *sim_seeds = _spawn_seeds(seed, n_simulations + 1) for i in np.arange(np.max(cluster_idx) + 1): coord_c = coordinates[cluster_idx == i, :] if mode == RipleyStat.F: - random = _ppp(hull, n_simulations=1, n_observations=n_observations, seed=seed) + random = _ppp(hull, n_simulations=1, n_observations=n_observations, seed_sequence=obs_seed) tree_c = NearestNeighbors(metric=metric, n_neighbors=n_neigh).fit(coord_c) distances, _ = tree_c.kneighbors(random, n_neighbors=n_neigh) bins, obs_stats = _f_g_function(distances.squeeze(), support) @@ -156,7 +156,7 @@ def ripley( pvalues = np.ones((le.classes_.shape[0], len(bins))) for i in range(n_simulations): - random_i = _ppp(hull, n_simulations=1, n_observations=n_observations, seed=seed) + random_i = _ppp(hull, n_simulations=1, n_observations=n_observations, seed_sequence=sim_seeds[i]) if mode == RipleyStat.F: tree_i = NearestNeighbors(metric=metric, n_neighbors=n_neigh).fit(random_i) distances_i, _ = tree_i.kneighbors(random, n_neighbors=1) @@ -216,7 +216,12 @@ def _l_function(distances: NDArrayA, support: NDArrayA, n: int, area: float) -> return support, l_estimate -def _ppp(hull: ConvexHull, n_simulations: int, n_observations: int, seed: int | None = None) -> NDArrayA: +def _ppp( + hull: ConvexHull, + n_simulations: int, + n_observations: int, + seed_sequence: np.random.SeedSequence, +) -> NDArrayA: """ Simulate Poisson Point Process on a polygon. @@ -228,14 +233,14 @@ def _ppp(hull: ConvexHull, n_simulations: int, n_observations: int, seed: int | Number of simulated point processes. n_observations Number of observations to sample from each simulation. - seed - Random seed. + seed_sequence + Seed sequence used to seed the legacy :class:`numpy.random.RandomState`. Returns ------- An Array with shape ``(n_simulation, n_observations, 2)``. """ - rng = default_rng(None if seed is None else seed) + rs = np.random.RandomState(np.random.MT19937(seed_sequence)) vxs = hull.points[hull.vertices] deln = Delaunay(vxs) @@ -246,8 +251,8 @@ def _ppp(hull: ConvexHull, n_simulations: int, n_observations: int, seed: int | i_obs = 0 while i_obs < n_observations: x, y = ( - rng.uniform(bbox[0], bbox[2]), - rng.uniform(bbox[1], bbox[3]), + rs.uniform(bbox[0], bbox[2]), + rs.uniform(bbox[1], bbox[3]), ) if deln.find_simplex((x, y)) >= 0: result[i_sim, i_obs] = (x, y) diff --git a/tests/_data/ligrec_no_numba.pickle b/tests/_data/ligrec_no_numba.pickle index e83cc26ae43899f0e562a0466b17fa8c402fdf83..138fc8545d8d9d2e3c37cb890d761357114126a1 100644 GIT binary patch literal 10534 zcmdU#du$X%9LMk7>*GqFv_%w2DB+PDy;2kmaVy0KjGSo25D6}O?b5yFdRKSv)T;Q1 z7HG0DQHCJm9}vVCf-y$?$HZtfihvTMe<&cL2nnEIowDK+?-Y%%YwuB-ES1LlMh_%A`LQ=SkeqN8KK95)9DVm4x z>`<_Flev2|o)ne15L1JlLR{z&6%}hG^J0m%Rg3g{ydFl5O%LH?(TEz%jKSI%J{9kZ zwylbZe1r^w-r5B7{DPtg-D*(n5)@So(mSjbx6<1?$agRzEib_9z4&OnU3>y-m064G z-(k(aAf8-6JFLg3P8%Cfb#`?Jd1`xSDwd3<*C^o?u`Ll-lS-;BNo09xPplPyuhI)6 zM-OgsM(|qv0G@)JsIUi1wVJm*+1(|QA+Bf>?p>#U>(-oeaX1`acoi;MAx9ENdRr#> zAPNOg{7{Oalt2kUDT7iDr2-0uu4`Zwt<*5`F(H-`RV>v=C&+&k|ScWm}c+qR+InfufAyLKO4c!^WS?|%2K$-ia3H|{6z z+PHYekNY_yPV@1)*;<`BeDrT>P;WAUo1b=&9ysY1sZlHDS4Lwoo?3;GBNa(SG=C%& zg@D8%rMzk^+D3ee9N=dsu@$*UEfq_p>1oQPDpWVvA3ku-q~4ZV!^MVs2f9|8o_lO# zZ%f0ng{!76{ptv}=3MQ5fBpO1dz-#IGwa54+>V`Z9$j(i1ow~o($(<3pE=)!>Z-E( zPpC)0tU3J#N^^CiBo6eu?6+Xo#;LzU8@(1yuyJzne;=>IWUpbp!odVy5UuI?JkujkB=5yorPOmufD;Jycj!=5r;Y^FaTl~nU?`+TS zYac4GPw9ti&h9uAy}&&gINr0!e~N?qhj+ewn(Gc6ukO(fG{cYZWf_pYZhG3&N1@Q$U`G9newq1mDr~g9tO?9V^=t6mv%pM-rWAQE_5P) zR+ONCHKoJZIYi+HbDYi4zWPz%a(+btRBDT&kEhjKCBFJT_3p*|ib~NKYl@8LwfUJd zV$bU_sLYx=a|FqYaz16lI{V5{`N%5*;p{n8jw-Ava_Y|q4|zW1^AG6<&j#|_W5sf( zrF6epiL6t*IMkHnTrBtI^nzR|NUPejbfQ zHP#djBb$fLqZ(8@;wrXOZg=F*=2}!|R*}TZftS>pe?wE8A*#WMpaPgF;^X4uyg@Ls ze{CB*DaU}WiwMs!x)Orpi;0hkF_o+#FLgN_@vTJ!p?*9f%Vca0G*bsz?Ql5B z0O+C>G2fh>VjsN8T^G`VIO zOhsI?99m*OjTpPXw7|?zsG(uztUE(Z4=g}f3pCw2v$3hEVSa;t?`>&ayf}n4PwRuB z*}zy@m)v_-h{lO}Q}d~blmgVnB+1UzlY-I#p{1*f^0T@Vat73JI*k+!5)B-U8C$rV zZima0tD@nOUxvlQ@+;{$yaBEJ9#nd;L<;6SJU}hrreymV43+N`lLFmO%fY=uG6{5a zH3n3+GA4A;k2OLpmUvv{+f!WtxP+w0$Ak#kq3hICL~^D1kyr9h85(pE`Us%Q5>Lzp z4ahw3N^lSL;eSx&dZoDg*-QG&1oQzD8x^XjE9m21+eEID3?AnIV!|Ym)AEahmBkxnBy#l*b4rx zWq|Q_7Qwdizif{V-h?cIZNY$g3hc?87A(SSp@Dj6`)d|qqXGqu^L1d`FFpGciy^kM zK_gcU1G44n#9$x=82HGx6%k%bs3Dxi!*-EELlbRTgxLZNbq`wG^|o=tESH&XSiENo zJ{*qs?4pP!f3fXcJ76Mn^4q}_nZ?+K7PDezx?xFpTL_~`6SMbh0gc1iiFWWtW-r-+ z9ht@CqK`Ufx=#4S3hS&wE!KJ2fD!f62=KjZV3eVHpAlw6(dQQgA?+d6UQ+EN)t6AM z#`1o>`5Kym5NIkR1%&?xGpK1c8f6TvyqV}BL{%oN>l*w|-jV4=VA%hI)5+|fUyFnP E0^Z=8RsaA1 literal 15619 zcmds;du$X{6o>a&TA-BD7Ft2@5eWDwU0MYVaVtd=8K@ORiKZ;mcIfV~-8TDBt0EX} zg=Exdhu|v)LNJD42oe9#glIGoMI}Z}#3#N;1O-t9D;n|KnS0pX*}cr&?(Xb{A-!j3 z=G;5?JLf)T`zz~Rn}+Axv_q-v6GL5Mq`tE^EY+_Li#?L6lr0pa;?3IUA$8Wn>RNTC z;_~%$hgD^2qI)0|mBJx07^&|OLt?iSj;PAm1;O6VHB0n!bqH!sq*Z*uKu4thTpd-Z z@Wn!XfzCBS$=5+Dfwxi%HlT1)~8m92WbbSzlP{>`?8tcXqWxD+IP{A*ICGac?)q<- z`N#X}=WQ$B*|nYNhwU0I&3lP3s(9G<4*s#@tH3Gm{pE)TmXsc4GJ)S+JD)n{?JqxE zGoXCI^fNN!Ib|Oulol2H(M$h6W6H9!rN3v&3iGuSS&f#mmkMP?#eT)X7yja5f8~>Z zPMG=`nem*mmkFZ_&yM>)?(evNV_(tY`vLC%S?=$ng)xO^M>d@AIG^#l(c*fL<$NC_ zj4d8^U~I1cN!#C7^gndo`Hc6!+u&ip`x;}0=XA`E70Qc?{f+DF&Dh8D*k|4>H(wK3 z^(lL~Fs`WB-@0PbS)MiQU~Knk9>28slzp5~QFwNI-^cg)Y~R<@0w-Lg>=nZJ!n0$y zV2#uE_iXoXY>#Cr`*@+Uc-Wz@&2+wJHJ@3`c%8CW3KNQoeO0^N)r@`2m!<3zgsP%q zUq5e|!z%VFp}O$wu-0kc@A3P4w%^aq#tY4RwNO)dcI+#xb=vzptaVwvuVZ^GOWA9L ziN(VXeZ~DftNokBjMpjqL}5}0r}#K)wp@koM&U_#k^oF^3e5q-cVIJVd@0 zJ}71Qh&lQrB*op?+a+o2sg2j^bOBFXGm@`{X3~j9by~*l-K$Cax>i+6e5-?E zxAt+87!3B_8}W6;`asSTqmnNuc98kIArkBGJ9N3$@Aj9@WXpOmXA_lB|CDq?n(Cm# z>-5Ep*RqyAWBpwF8QYJa(?yk6^pszOJe|`$Iuj`8=MW`5|; z@=Lu=@p*O6Z1Eel<441Yw)p~h=Ufd8!g^PYrM}sr}(@w zs8#qDFMYg9s8#s-;C|m^V;o+`>&Kb|H2b__XtCkrtY_hwU8nfGdT74!anKuWdW>ZF z_!)by8{47yypm|X@v(b+_=p(?$NIdM;`6GaR^umwA+FITd)aKp`|NXy&nt{tg`e4y zhWKnu)@Y#kyxOR>`1mJ1W>{I{^U9;v;3JOFnuQ_r`!xH!3aK^t@%=orNibr3UXj#l z{N(&+j;GgW*Q43z)kzD3&+fh%p4okh&nuN$g`e-NV^}CYuUcvielmRgEcc^;;`0io z*5JpNzBoGhls9}{&D2_a-1jjEdBf+GO|8O5VBO{(8{4IWpI14(K=IRwr@9a}GIh+T zaHi1!QR2)*g3M6m-0B#)%6VQTjNs)>;(VR-+Dzg!Xl2`>j=T$s90;2jwZaISxqEsB z!X^#*2!u_nnTfFZ8A;ZU=aQufoATozpe4$h@{@3(U6r2#vsRUdbh8PJQL~)IDO!{N zs%ZGb1NO8;`M)(ml^)qrAeT7xSy-6yl)I<%k0<`+VFV+tk%Y6QE}e?5T_YQsWLO?9 Sf;#dnC=#Uf3`g8!YW@Gvn4G%+ diff --git a/tests/graph/test_ligrec.py b/tests/graph/test_ligrec.py index fb0b3f80b..cbe6b1b14 100644 --- a/tests/graph/test_ligrec.py +++ b/tests/graph/test_ligrec.py @@ -306,6 +306,15 @@ def test_reproducibility_cores(self, adata: AnnData, interactions: Interactions_ assert not np.allclose(r3["pvalues"], r1["pvalues"]) assert not np.allclose(r3["pvalues"], r2["pvalues"]) + def test_n_jobs_invariance(self, adata: AnnData, interactions: Interactions_t): + """The number of workers must not change the result (one seed is spawned per permutation).""" + kw = {"interactions": interactions, "n_perms": 25, "copy": True, "show_progress_bar": False, "seed": 42} + res_serial = ligrec(adata, _CK, n_jobs=1, **kw) + res_parallel = ligrec(adata, _CK, n_jobs=2, **kw) + + np.testing.assert_allclose(res_serial["means"], res_parallel["means"]) + np.testing.assert_allclose(res_serial["pvalues"], res_parallel["pvalues"]) + def test_reproducibility_numba_parallel_off(self, adata: AnnData, interactions: Interactions_t): t1 = time() r1 = ligrec( diff --git a/tests/graph/test_nhood.py b/tests/graph/test_nhood.py index 74764f236..814305404 100644 --- a/tests/graph/test_nhood.py +++ b/tests/graph/test_nhood.py @@ -58,6 +58,17 @@ def test_reproducibility(self, adata: AnnData, n_jobs: int): np.testing.assert_array_equal(res3.zscore, res2.zscore) np.testing.assert_array_equal(res3.counts, res2.counts) + def test_n_jobs_invariance(self, adata: AnnData): + """The number of workers must not change the result (one seed is spawned per permutation).""" + spatial_neighbors(adata) + + kw = {"cluster_key": _CK, "seed": 42, "n_perms": 20, "copy": True} + res_serial = nhood_enrichment(adata, n_jobs=1, **kw) + res_parallel = nhood_enrichment(adata, n_jobs=2, **kw) + + np.testing.assert_array_equal(res_serial.zscore, res_parallel.zscore) + np.testing.assert_array_equal(res_serial.counts, res_parallel.counts) + def test_centrality_scores(nhood_data: AnnData): adata = nhood_data diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py index 01c2e3033..73c553bb2 100644 --- a/tests/graph/test_ppatterns.py +++ b/tests/graph/test_ppatterns.py @@ -46,9 +46,11 @@ def test_spatial_autocorr_seq_par(dummy_adata: AnnData, mode: str): assert not np.array_equal(idx_df, idx_adata) np.testing.assert_array_equal(sorted(idx_df), sorted(idx_adata)) # check parallel gives same results - with pytest.raises(AssertionError, match=r'.*\(column name="pval_z_sim"\) are different.*'): - # because the seeds will be different, we don't expect the pval_sim values to be the same - assert_frame_equal(df, df_parallel) + # each permutation now gets its own seed (spawned from a SeedSequence), so the + # simulated p-values no longer depend on how the permutations are split across jobs + np.testing.assert_allclose(df["pval_sim"].values, df_parallel["pval_sim"].values, atol=1e-12) + np.testing.assert_allclose(df["pval_z_sim"].values, df_parallel["pval_z_sim"].values, atol=1e-12) + np.testing.assert_allclose(df["var_sim"].values, df_parallel["var_sim"].values, atol=1e-12) @pytest.mark.parametrize("mode", ["moran", "geary"]) @@ -90,6 +92,19 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod assert_frame_equal(df_1, df_2) +@pytest.mark.parametrize("mode", ["moran", "geary"]) +def test_spatial_autocorr_n_jobs_invariance(dummy_adata: AnnData, mode: str): + """The number of workers must not change the permutation-based results (seed spawned per permutation).""" + kwargs = {"mode": mode, "copy": True, "seed": 42, "n_perms": 50} + df_serial = spatial_autocorr(dummy_adata, n_jobs=1, **kwargs) + df_parallel = spatial_autocorr(dummy_adata, n_jobs=2, **kwargs) + + # align on the gene index in case the stat-based sort order ties differently + df_parallel = df_parallel.loc[df_serial.index] + for col in ["pval_sim", "pval_z_sim", "var_sim"]: + np.testing.assert_allclose(df_serial[col].values, df_parallel[col].values, atol=1e-12) + + @pytest.mark.parametrize( "attr,layer,genes", [ diff --git a/tests/graph/test_ripley.py b/tests/graph/test_ripley.py index 750acbff2..afa395007 100644 --- a/tests/graph/test_ripley.py +++ b/tests/graph/test_ripley.py @@ -91,3 +91,26 @@ def test_ripley_results( # assert n_zeros == n_clusters idx = np.nonzero(obs_df.bins.values)[0] assert idx.shape[0] == n_steps * n_clusters - n_clusters + + +@pytest.mark.parametrize("mode", [RipleyStat.F, RipleyStat.G, RipleyStat.L]) +def test_ripley_seed(adata_ripley: AnnData, mode: RipleyStat): + """Same seed reproduces simulations, different seeds change them, and simulations are not all identical.""" + adata = adata_ripley + kw = dict(cluster_key=CLUSTER_KEY, mode=mode.s, n_simulations=20, copy=True) + + res1 = ripley(adata, seed=42, **kw) + res2 = ripley(adata, seed=42, **kw) + res3 = ripley(adata, seed=43, **kw) + + sims1 = res1["sims_stat"].pivot(index="bins", columns="simulations", values="stats").to_numpy() + sims2 = res2["sims_stat"].pivot(index="bins", columns="simulations", values="stats").to_numpy() + sims3 = res3["sims_stat"].pivot(index="bins", columns="simulations", values="stats").to_numpy() + + # same seed -> identical simulations + np.testing.assert_array_equal(sims1, sims2) + # different seed -> different simulations + assert not np.array_equal(sims1, sims3) + # regression: simulations must not all be identical to each other + # (previously every simulation reused the same seed and produced the same point pattern) + assert not np.allclose(sims1, sims1[:, [0]]) From e58b2913cbd7a24115d661148a2b643392158458 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Sat, 27 Jun 2026 00:06:07 +0200 Subject: [PATCH 2/5] unify kw --- tests/graph/test_ppatterns.py | 6 +++--- tests/graph/test_ripley.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py index 73c553bb2..36425eeaf 100644 --- a/tests/graph/test_ppatterns.py +++ b/tests/graph/test_ppatterns.py @@ -95,9 +95,9 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod @pytest.mark.parametrize("mode", ["moran", "geary"]) def test_spatial_autocorr_n_jobs_invariance(dummy_adata: AnnData, mode: str): """The number of workers must not change the permutation-based results (seed spawned per permutation).""" - kwargs = {"mode": mode, "copy": True, "seed": 42, "n_perms": 50} - df_serial = spatial_autocorr(dummy_adata, n_jobs=1, **kwargs) - df_parallel = spatial_autocorr(dummy_adata, n_jobs=2, **kwargs) + kw = {"mode": mode, "copy": True, "seed": 42, "n_perms": 50} + df_serial = spatial_autocorr(dummy_adata, n_jobs=1, **kw) + df_parallel = spatial_autocorr(dummy_adata, n_jobs=2, **kw) # align on the gene index in case the stat-based sort order ties differently df_parallel = df_parallel.loc[df_serial.index] diff --git a/tests/graph/test_ripley.py b/tests/graph/test_ripley.py index afa395007..a87e01b23 100644 --- a/tests/graph/test_ripley.py +++ b/tests/graph/test_ripley.py @@ -97,7 +97,7 @@ def test_ripley_results( def test_ripley_seed(adata_ripley: AnnData, mode: RipleyStat): """Same seed reproduces simulations, different seeds change them, and simulations are not all identical.""" adata = adata_ripley - kw = dict(cluster_key=CLUSTER_KEY, mode=mode.s, n_simulations=20, copy=True) + kw = {"cluster_key": CLUSTER_KEY, "mode": mode.s, "n_simulations": 20, "copy": True} res1 = ripley(adata, seed=42, **kw) res2 = ripley(adata, seed=42, **kw) From 20d695c0d4d9ab48c404e05012e47d1c3450067e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Jun 2026 22:33:43 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/graph/test_ppatterns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py index 92de91f16..8cc10ca12 100644 --- a/tests/graph/test_ppatterns.py +++ b/tests/graph/test_ppatterns.py @@ -104,7 +104,7 @@ def test_spatial_autocorr_n_jobs_invariance(dummy_adata: AnnData, mode: str): for col in ["pval_sim", "pval_z_sim", "var_sim"]: np.testing.assert_allclose(df_serial[col].values, df_parallel[col].values, atol=1e-12) - + def test_spatial_autocorr_var_norm_formula(dummy_adata: AnnData, mode: str): """Analytic ``var_norm`` must use the variance matching the chosen statistic. From b6e0d18e9f118697e82c53a3aafc88d6c1de2fab Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Sat, 27 Jun 2026 00:33:22 +0200 Subject: [PATCH 4/5] make it numba compat --- src/squidpy/_utils.py | 7 +++++-- src/squidpy/gr/_ligrec.py | 6 +++--- src/squidpy/gr/_nhood.py | 4 ++-- src/squidpy/gr/_ppatterns.py | 4 ++-- src/squidpy/gr/_ripley.py | 12 ++++++------ 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/squidpy/_utils.py b/src/squidpy/_utils.py index 5860d686f..977784965 100644 --- a/src/squidpy/_utils.py +++ b/src/squidpy/_utils.py @@ -243,8 +243,11 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: return wrapper -def _spawn_seeds(seed: int | None, n: int) -> list[np.random.SeedSequence]: - return np.random.SeedSequence(seed).spawn(n) +def _spawn_seeds(seed: int | None, n: int) -> list[int]: + # uint32 integer seeds, derived from independent SeedSequence children (avoids the correlation + # of sequential seeds). uint32 is required to stay reproducible with numba/legacy RandomState, + # which only accept uint32 integer seeds. + return [int(s.generate_state(1)[0]) for s in np.random.SeedSequence(seed).spawn(n)] def thread_map( diff --git a/src/squidpy/gr/_ligrec.py b/src/squidpy/gr/_ligrec.py index ad30b3e88..07d161c4f 100644 --- a/src/squidpy/gr/_ligrec.py +++ b/src/squidpy/gr/_ligrec.py @@ -788,7 +788,7 @@ def _analysis_helper( interactions: NDArrayA, interaction_clusters: NDArrayA, clustering: NDArrayA, - seeds: Sequence[np.random.SeedSequence], + seeds: Sequence[int], numba_parallel: bool | None = None, queue: SigQueue | None = None, ) -> TempResult: @@ -813,7 +813,7 @@ def _analysis_helper( clustering Array of shape `(n_cells,)` containing the original clustering. seeds - One :class:`numpy.random.SeedSequence` per permutation, indexed by the values in ``perms``. + One ``uint32`` integer seed per permutation, indexed by the values in ``perms``. numba_parallel Whether to use :func:`numba.prange` or not. If `None`, it's determined automatically. queue @@ -858,7 +858,7 @@ def _analysis_helper( for p in perms: # shuffle from the same base with a per-permutation seed, so each permutation is # independent of the others and of how the permutations are split across jobs - rs = np.random.RandomState(np.random.MT19937(seeds[p])) + rs = np.random.RandomState(seeds[p]) clustering = clustering_base.copy() rs.shuffle(clustering) error = test(interactions, interaction_clusters, data, clustering, mean, mask, res=res) diff --git a/src/squidpy/gr/_nhood.py b/src/squidpy/gr/_nhood.py index c7b2e1c4a..3f1c143c3 100644 --- a/src/squidpy/gr/_nhood.py +++ b/src/squidpy/gr/_nhood.py @@ -452,7 +452,7 @@ def _nhood_enrichment_helper( int_clust: NDArrayA, libraries: pd.Series[CategoricalDtype] | None, n_cls: int, - seeds: Sequence[np.random.SeedSequence], + seeds: Sequence[int], queue: SigQueue | None = None, ) -> NDArrayA: perms = np.empty((len(ixs), n_cls, n_cls), dtype=np.float64) @@ -461,7 +461,7 @@ def _nhood_enrichment_helper( for i, ix in enumerate(ixs): # shuffle from the same base with a per-permutation seed, so each permutation is # independent of the others and of how the permutations are split across jobs - rs = np.random.RandomState(np.random.MT19937(seeds[ix])) + rs = np.random.RandomState(seeds[ix]) if libraries is not None: shuffled = _shuffle_group(int_clust, libraries, rs) else: diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index 5829382ae..d65ad1859 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -258,14 +258,14 @@ def _score_helper( mode: SpatialAutocorr, g: spmatrix, vals: NDArrayA, - seeds: Sequence[np.random.SeedSequence], + seeds: Sequence[int], queue: SigQueue | None = None, ) -> pd.DataFrame: score_perms = np.empty((len(perms), vals.shape[0])) func = morans_i if mode == SpatialAutocorr.MORAN else gearys_c for i, p in enumerate(perms): - rs = np.random.RandomState(np.random.MT19937(seeds[p])) + rs = np.random.RandomState(seeds[p]) idx_shuffle = rs.permutation(g.shape[0]) score_perms[i, :] = func(g[idx_shuffle, :], vals) diff --git a/src/squidpy/gr/_ripley.py b/src/squidpy/gr/_ripley.py index ad4f8d354..43f1665fc 100644 --- a/src/squidpy/gr/_ripley.py +++ b/src/squidpy/gr/_ripley.py @@ -137,7 +137,7 @@ def ripley( for i in np.arange(np.max(cluster_idx) + 1): coord_c = coordinates[cluster_idx == i, :] if mode == RipleyStat.F: - random = _ppp(hull, n_simulations=1, n_observations=n_observations, seed_sequence=obs_seed) + random = _ppp(hull, n_simulations=1, n_observations=n_observations, seed=obs_seed) tree_c = NearestNeighbors(metric=metric, n_neighbors=n_neigh).fit(coord_c) distances, _ = tree_c.kneighbors(random, n_neighbors=n_neigh) bins, obs_stats = _f_g_function(distances.squeeze(), support) @@ -156,7 +156,7 @@ def ripley( pvalues = np.ones((le.classes_.shape[0], len(bins))) for i in range(n_simulations): - random_i = _ppp(hull, n_simulations=1, n_observations=n_observations, seed_sequence=sim_seeds[i]) + random_i = _ppp(hull, n_simulations=1, n_observations=n_observations, seed=sim_seeds[i]) if mode == RipleyStat.F: tree_i = NearestNeighbors(metric=metric, n_neighbors=n_neigh).fit(random_i) distances_i, _ = tree_i.kneighbors(random, n_neighbors=1) @@ -220,7 +220,7 @@ def _ppp( hull: ConvexHull, n_simulations: int, n_observations: int, - seed_sequence: np.random.SeedSequence, + seed: int, ) -> NDArrayA: """ Simulate Poisson Point Process on a polygon. @@ -233,14 +233,14 @@ def _ppp( Number of simulated point processes. n_observations Number of observations to sample from each simulation. - seed_sequence - Seed sequence used to seed the legacy :class:`numpy.random.RandomState`. + seed + ``uint32`` integer seed for the legacy :class:`numpy.random.RandomState`. Returns ------- An Array with shape ``(n_simulation, n_observations, 2)``. """ - rs = np.random.RandomState(np.random.MT19937(seed_sequence)) + rs = np.random.RandomState(seed) vxs = hull.points[hull.vertices] deln = Delaunay(vxs) From dbc04f74a5a22591e177e81cff9824bb81c73a3a Mon Sep 17 00:00:00 2001 From: "selman.ozleyen" Date: Mon, 29 Jun 2026 14:53:23 +0200 Subject: [PATCH 5/5] docs: warn about per-permutation seeding behaviour change The seeds for permutation/simulation tests are now spawned per permutation from a numpy.random.SeedSequence. This makes results independent of `n_jobs`/`backend`, but changes the results obtained with a given `seed` relative to earlier squidpy versions. Document this with a `.. versionchanged:: 1.8.4` note on the affected public functions (`ligrec`, `nhood_enrichment`, `spatial_autocorr`, `ripley`). The shared note for the permutation-based functions lives in a single docrep template (`seed_versionchanged`) to avoid duplication; `ripley` keeps a tailored note as it concerns simulations. Also add a release-notes entry. Co-Authored-By: Claude Opus 4.8 --- src/squidpy/_docs.py | 10 ++++++++++ src/squidpy/gr/_ligrec.py | 2 ++ src/squidpy/gr/_nhood.py | 2 ++ src/squidpy/gr/_ppatterns.py | 2 ++ src/squidpy/gr/_ripley.py | 9 +++++++++ 5 files changed, 25 insertions(+) diff --git a/src/squidpy/_docs.py b/src/squidpy/_docs.py index 3915f658c..3c9020ed3 100644 --- a/src/squidpy/_docs.py +++ b/src/squidpy/_docs.py @@ -49,6 +49,15 @@ def decorator2(obj: Any) -> Any: _seed = """\ seed Random seed for reproducibility.""" +_seed_versionchanged = """\ +.. versionchanged:: 1.8.4 + Every permutation now draws from an independent seed spawned from a + :class:`numpy.random.SeedSequence`, following `NumPy's guidance on parallel random + number generation `_. + Consequently the permutation-based results no longer depend on ``n_jobs`` / ``backend``, but + results obtained with a given ``seed`` differ from those produced by squidpy < 1.8.4. + See `#1232 `_ and + `#1233 `_.""" _n_perms = """\ n_perms Number of permutations for the permutation test.""" @@ -413,6 +422,7 @@ def decorator2(obj: Any) -> Any: copy_cont=_copy_cont, numba_parallel=_numba_parallel, seed=_seed, + seed_versionchanged=_seed_versionchanged, n_perms=_n_perms, img_layer=_img_layer, feature_name=_feature_name, diff --git a/src/squidpy/gr/_ligrec.py b/src/squidpy/gr/_ligrec.py index 07d161c4f..81d2ddd3b 100644 --- a/src/squidpy/gr/_ligrec.py +++ b/src/squidpy/gr/_ligrec.py @@ -339,6 +339,8 @@ def test( """ Perform the permutation test as described in :cite:`cellphonedb`. + %(seed_versionchanged)s + Parameters ---------- %(cluster_key)s diff --git a/src/squidpy/gr/_nhood.py b/src/squidpy/gr/_nhood.py index 3f1c143c3..17aa5d2a1 100644 --- a/src/squidpy/gr/_nhood.py +++ b/src/squidpy/gr/_nhood.py @@ -160,6 +160,8 @@ def nhood_enrichment( """ Compute neighborhood enrichment by permutation test. + %(seed_versionchanged)s + Parameters ---------- %(adata)s diff --git a/src/squidpy/gr/_ppatterns.py b/src/squidpy/gr/_ppatterns.py index d65ad1859..8e17845e8 100644 --- a/src/squidpy/gr/_ppatterns.py +++ b/src/squidpy/gr/_ppatterns.py @@ -85,6 +85,8 @@ def spatial_autocorr( (``'pval_sim'``, ``'pval_z_sim'``) are unaffected. See `#1183 `_. + %(seed_versionchanged)s + Parameters ---------- %(adata)s diff --git a/src/squidpy/gr/_ripley.py b/src/squidpy/gr/_ripley.py index 43f1665fc..44d00c711 100644 --- a/src/squidpy/gr/_ripley.py +++ b/src/squidpy/gr/_ripley.py @@ -47,6 +47,15 @@ def ripley( According to the `'mode'` argument, it calculates one of the following Ripley's statistics: `{rp.F.s!r}`, `{rp.G.s!r}` or `{rp.L.s!r}` statistics. + .. versionchanged:: 1.8.4 + Every simulation now draws from an independent seed spawned from a + :class:`numpy.random.SeedSequence`, following `NumPy's guidance on parallel random + number generation `_. + Previously every simulation reused the same ``seed`` and therefore produced an identical + point pattern; the simulated statistics (and hence the p-values) are now genuinely + independent across simulations, but results obtained with a given ``seed`` differ from + those produced by squidpy < 1.8.4. + `{rp.F.s!r}`, `{rp.G.s!r}` are defined as: .. math::