diff --git a/docs/conf.py b/docs/conf.py index 56a1af45d8..8606c87b83 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -136,6 +136,7 @@ cycler=("https://matplotlib.org/cycler/", None), dask=("https://docs.dask.org/en/stable/", None), dask_ml=("https://ml.dask.org/", None), + decoupler=("https://decoupler.readthedocs.io/en/stable/", None), fast_array_utils=( "https://icb-fast-array-utils.readthedocs-hosted.com/en/stable/", None, @@ -150,6 +151,7 @@ networkx=("https://networkx.org/documentation/stable/", None), numpy=("https://numpy.org/doc/stable/", None), pandas=("https://pandas.pydata.org/pandas-docs/stable/", None), + pydeseq2=("https://pydeseq2.readthedocs.io/en/stable/", None), pynndescent=("https://pynndescent.readthedocs.io/en/latest/", None), pytest=("https://docs.pytest.org/en/latest/", None), python=( diff --git a/docs/references.bib b/docs/references.bib index f20757c39c..74453033a4 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -829,6 +829,21 @@ @article{Soneson2018 pages = {255--261}, } +@article{Squair2021, + author = {Squair, Jordan W. and Gautier, Matthieu and Kathe, Claudia and Anderson, Mark A. and James, Nicholas D. and Hutson, Thomas H. and Hudelle, Rémi and Qaiser, Taha and Matson, Kaya J. E. and Barraud, Quentin and Levine, Ariel J. and La Manno, Gioele and Skinnider, Michael A. and Courtine, Grégoire}, + title = {Confronting false discoveries in single-cell differential expression}, + volume = {12}, + issn = {2041-1723}, + url = {https://doi.org/10.1038/s41467-021-25960-2}, + doi = {10.1038/s41467-021-25960-2}, + number = {1}, + journal = {Nature Communications}, + publisher = {Springer Science and Business Media LLC}, + year = {2021}, + month = {sep}, + pages = {5692}, +} + @article{Stuart2019, author = {Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck, William M. and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul}, title = {Comprehensive Integration of Single-Cell Data}, diff --git a/docs/release-notes/3700.feat.md b/docs/release-notes/3700.feat.md index c20e5be4c9..9fe24424ac 100644 --- a/docs/release-notes/3700.feat.md +++ b/docs/release-notes/3700.feat.md @@ -1 +1 @@ -Make {func}`scanpy.get.aggregate` `dask` compatible with all aggregations except median. {smaller}`I Gold` +Make {func}`scanpy.get.aggregate` :doc:`dask:index` compatible with all aggregations except median. {smaller}`I Gold` diff --git a/docs/release-notes/3792.docs.md b/docs/release-notes/3792.docs.md new file mode 100644 index 0000000000..2bf669b431 --- /dev/null +++ b/docs/release-notes/3792.docs.md @@ -0,0 +1 @@ +Add a warning message to :func:`~scanpy.tl.rank_genes_groups` that it is not appropriate for most use-cases. diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index a4da5ae0f9..5e8950be5e 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -526,6 +526,15 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 Expects logarithmized data. + .. warning:: + + Comparing between cells leads to highly inflated p-values, + since cells are not independent observations :cite:p`Squair2021`. + Especially in single-cell data, consider instead to use more appropriate methods such as combining pseudobulking with :doc:`pydeseq2:index`. + + :func:`decoupler.pp.pseudobulk` or :func:`scanpy.get.aggregate` can be used to aggregate samples for pseudobulking. + Ours is a bit more verbose, but supports :doc:`dask:index` arrays for improved performance. + Parameters ---------- adata diff --git a/tests/test_pca.py b/tests/test_pca.py index 5e68977f32..9fdbd98222 100644 --- a/tests/test_pca.py +++ b/tests/test_pca.py @@ -341,7 +341,7 @@ def test_pca_reproducible(array_type): assert not np.array_equal(a.obsm["X_pca"], c.obsm["X_pca"]) -def test_pca_chunked(): +def test_pca_chunked() -> None: """Tests that chunked PCA is equivalent to default PCA. See also @@ -354,17 +354,21 @@ def test_pca_chunked(): default = sc.pp.pca(pbmc_full, copy=True) # Taking absolute value since sometimes dimensions are flipped + rtol = 1e-6 np.testing.assert_allclose( - np.abs(chunked.obsm["X_pca"]), np.abs(default.obsm["X_pca"]) + np.abs(chunked.obsm["X_pca"]), np.abs(default.obsm["X_pca"]), rtol=rtol ) - np.testing.assert_allclose(np.abs(chunked.varm["PCs"]), np.abs(default.varm["PCs"])) np.testing.assert_allclose( - np.abs(chunked.uns["pca"]["variance"]), np.abs(default.uns["pca"]["variance"]) + np.abs(chunked.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=rtol + ) + np.testing.assert_allclose( + np.abs(chunked.uns["pca"]["variance"]), + np.abs(default.uns["pca"]["variance"], rtol=rtol), ) np.testing.assert_allclose( np.abs(chunked.uns["pca"]["variance_ratio"]), np.abs(default.uns["pca"]["variance_ratio"]), - rtol=1e-6, + rtol=rtol, )