Skip to content

Commit 6b22de7

Browse files
grstflying-sheepilan-gold
authored
Add warning for rank_genes_groups (#3792)
Co-authored-by: Phil Schaf <[email protected]> Co-authored-by: Ilan Gold <[email protected]>
1 parent 7717228 commit 6b22de7

File tree

6 files changed

+38
-6
lines changed

6 files changed

+38
-6
lines changed

docs/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@
136136
cycler=("https://matplotlib.org/cycler/", None),
137137
dask=("https://docs.dask.org/en/stable/", None),
138138
dask_ml=("https://ml.dask.org/", None),
139+
decoupler=("https://decoupler.readthedocs.io/en/stable/", None),
139140
fast_array_utils=(
140141
"https://icb-fast-array-utils.readthedocs-hosted.com/en/stable/",
141142
None,
@@ -150,6 +151,7 @@
150151
networkx=("https://networkx.org/documentation/stable/", None),
151152
numpy=("https://numpy.org/doc/stable/", None),
152153
pandas=("https://pandas.pydata.org/pandas-docs/stable/", None),
154+
pydeseq2=("https://pydeseq2.readthedocs.io/en/stable/", None),
153155
pynndescent=("https://pynndescent.readthedocs.io/en/latest/", None),
154156
pytest=("https://docs.pytest.org/en/latest/", None),
155157
python=(

docs/references.bib

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,21 @@ @article{Soneson2018
829829
pages = {255--261},
830830
}
831831

832+
@article{Squair2021,
833+
author = {Squair, Jordan W. and Gautier, Matthieu and Kathe, Claudia and Anderson, Mark A. and James, Nicholas D. and Hutson, Thomas H. and Hudelle, Rémi and Qaiser, Taha and Matson, Kaya J. E. and Barraud, Quentin and Levine, Ariel J. and La Manno, Gioele and Skinnider, Michael A. and Courtine, Grégoire},
834+
title = {Confronting false discoveries in single-cell differential expression},
835+
volume = {12},
836+
issn = {2041-1723},
837+
url = {https://doi.org/10.1038/s41467-021-25960-2},
838+
doi = {10.1038/s41467-021-25960-2},
839+
number = {1},
840+
journal = {Nature Communications},
841+
publisher = {Springer Science and Business Media LLC},
842+
year = {2021},
843+
month = {sep},
844+
pages = {5692},
845+
}
846+
832847
@article{Stuart2019,
833848
author = {Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck, William M. and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul},
834849
title = {Comprehensive Integration of Single-Cell Data},

docs/release-notes/3700.feat.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Make {func}`scanpy.get.aggregate` `dask` compatible with all aggregations except median. {smaller}`I Gold`
1+
Make {func}`scanpy.get.aggregate` :doc:`dask:index` compatible with all aggregations except median. {smaller}`I Gold`

docs/release-notes/3792.docs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a warning message to :func:`~scanpy.tl.rank_genes_groups` that it is not appropriate for most use-cases.

src/scanpy/tools/_rank_genes_groups.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,15 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915
526526
527527
Expects logarithmized data.
528528
529+
.. warning::
530+
531+
Comparing between cells leads to highly inflated p-values,
532+
since cells are not independent observations :cite:p`Squair2021`.
533+
Especially in single-cell data, consider instead to use more appropriate methods such as combining pseudobulking with :doc:`pydeseq2:index`.
534+
535+
:func:`decoupler.pp.pseudobulk` or :func:`scanpy.get.aggregate` can be used to aggregate samples for pseudobulking.
536+
Ours is a bit more verbose, but supports :doc:`dask:index` arrays for improved performance.
537+
529538
Parameters
530539
----------
531540
adata

tests/test_pca.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def test_pca_reproducible(array_type):
341341
assert not np.array_equal(a.obsm["X_pca"], c.obsm["X_pca"])
342342

343343

344-
def test_pca_chunked():
344+
def test_pca_chunked() -> None:
345345
"""Tests that chunked PCA is equivalent to default PCA.
346346
347347
See also <https://github.com/scverse/scanpy/issues/1590>
@@ -354,17 +354,22 @@ def test_pca_chunked():
354354
default = sc.pp.pca(pbmc_full, copy=True)
355355

356356
# Taking absolute value since sometimes dimensions are flipped
357+
rtol = 1e-6
357358
np.testing.assert_allclose(
358-
np.abs(chunked.obsm["X_pca"]), np.abs(default.obsm["X_pca"])
359+
np.abs(chunked.obsm["X_pca"]), np.abs(default.obsm["X_pca"]), rtol=rtol
359360
)
360-
np.testing.assert_allclose(np.abs(chunked.varm["PCs"]), np.abs(default.varm["PCs"]))
361361
np.testing.assert_allclose(
362-
np.abs(chunked.uns["pca"]["variance"]), np.abs(default.uns["pca"]["variance"])
362+
np.abs(chunked.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=rtol
363+
)
364+
np.testing.assert_allclose(
365+
np.abs(chunked.uns["pca"]["variance"]),
366+
np.abs(default.uns["pca"]["variance"]),
367+
rtol=rtol,
363368
)
364369
np.testing.assert_allclose(
365370
np.abs(chunked.uns["pca"]["variance_ratio"]),
366371
np.abs(default.uns["pca"]["variance_ratio"]),
367-
rtol=1e-6,
372+
rtol=rtol,
368373
)
369374

370375

0 commit comments

Comments
 (0)