From 5eb83a4d969e43fec415fd88da8147cc07fd60e6 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Wed, 28 May 2025 09:34:44 +0200 Subject: [PATCH 01/42] WIP presets --- docs/api/settings.md | 20 +++++----- docs/extensions/settings_singleton.py | 40 +++++++++++++++++++ src/scanpy/__init__.py | 1 + src/scanpy/_settings.py | 39 +++++++++++++++--- src/scanpy/datasets/_datasets.py | 2 +- src/scanpy/datasets/_ebi_expression_atlas.py | 2 +- src/scanpy/external/pp/_mnn_correct.py | 2 +- src/scanpy/logging.py | 6 +-- src/scanpy/neighbors/_doc.py | 2 +- .../preprocessing/_highly_variable_genes.py | 22 ++++++---- src/scanpy/preprocessing/_simple.py | 2 +- src/scanpy/tools/_tsne.py | 2 +- 12 files changed, 107 insertions(+), 33 deletions(-) create mode 100644 docs/extensions/settings_singleton.py diff --git a/docs/api/settings.md b/docs/api/settings.md index 32eb70f3a9..a622041a65 100644 --- a/docs/api/settings.md +++ b/docs/api/settings.md @@ -18,14 +18,14 @@ high-resolution jupyter display backend useful for use in notebooks. set_figure_params ``` -An instance of the {class}`~scanpy._settings.ScanpyConfig` is available as `scanpy.settings` and allows configuring Scanpy. +An object that allows configuring Scanpy. ```{eval-rst} .. autosummary:: :nosignatures: :toctree: ../generated/ - _settings.ScanpyConfig + settings ``` Some selected settings are discussed in the following. @@ -39,8 +39,8 @@ you'd usually want to set `settings.autoshow` to `False`. .. autosummary:: :nosignatures: - ~_settings.ScanpyConfig.autoshow - ~_settings.ScanpyConfig.autosave + ~settings.autoshow + ~settings.autosave ``` IO related settings for saving figures, caching files and storing datasets. @@ -50,11 +50,11 @@ IO related settings for saving figures, caching files and storing datasets. .. autosummary:: :nosignatures: - ~_settings.ScanpyConfig.figdir - ~_settings.ScanpyConfig.cachedir - ~_settings.ScanpyConfig.datasetdir - ~_settings.ScanpyConfig.file_format_figs - ~_settings.ScanpyConfig.file_format_data + ~settings.figdir + ~settings.cachedir + ~settings.datasetdir + ~settings.file_format_figs + ~settings.file_format_data ``` The verbosity of logging output, where verbosity levels have the following @@ -65,7 +65,7 @@ details, etc. .. autosummary:: :nosignatures: - ~_settings.ScanpyConfig.verbosity + ~settings.verbosity ``` Print versions of packages that might influence numerical results. diff --git a/docs/extensions/settings_singleton.py b/docs/extensions/settings_singleton.py new file mode 100644 index 0000000000..4392b07441 --- /dev/null +++ b/docs/extensions/settings_singleton.py @@ -0,0 +1,40 @@ +"""Extension to warn about numpydoc-style parameter types in docstrings.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from sphinx.ext import autosummary + +if TYPE_CHECKING: + from collections.abc import Sequence + from typing import Any + + from sphinx.application import Sphinx + + +import_by_name = autosummary.import_by_name + + +def _patched_import_by_name( + name: str, + prefixes: Sequence[str | None] = (None,), +) -> tuple[str, Any, Any, str]: + if name.startswith("scanpy.settings"): + prefixed_name, obj, parent, modname = import_by_name( + name.replace("scanpy.settings", "scanpy._settings.Settings"), prefixes + ) + prefixed_name = prefixed_name.replace( + "scanpy._settings.Settings", "scanpy.settings" + ) + if parent.__name__ == "scanpy._settings": + parent = import_by_name("scanpy")[1] + + return prefixed_name, obj, parent, "scanpy" + + return import_by_name(name, prefixes) + + +def setup(app: Sphinx) -> None: + """App setup hook.""" + autosummary.import_by_name = _patched_import_by_name diff --git a/src/scanpy/__init__.py b/src/scanpy/__init__.py index 9212318f6a..f772ceeec4 100644 --- a/src/scanpy/__init__.py +++ b/src/scanpy/__init__.py @@ -2,6 +2,7 @@ from __future__ import annotations +import os import sys from packaging.version import Version diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 9837a9e85b..0f0b046e3b 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -3,7 +3,8 @@ import inspect import sys from contextlib import contextmanager -from enum import IntEnum +from enum import IntEnum, StrEnum, auto +from functools import cached_property from logging import getLevelNamesMapping from pathlib import Path from time import time @@ -17,6 +18,8 @@ from collections.abc import Generator, Iterable from typing import Any, TextIO + from .preprocessing import _highly_variable_genes as hvg + # Collected from the print_* functions in matplotlib.backends _Format = ( Literal["png", "jpg", "tif", "tiff"] # noqa: PYI030 @@ -30,6 +33,19 @@ AnnDataFileFormat = Literal["h5ad", "zarr"] +class Preset(StrEnum): + ScanpyV1 = auto() + SeuratV5 = auto() + + @cached_property + def highly_variable_genes(self) -> hvg.Flavor: + match self: + case Preset.ScanpyV1: + return "seurat" + case Preset.SeuratV5: + return "seurat_v3" + + _VERBOSITY_TO_LOGLEVEL: dict[int | _VerbosityName, _LoggingLevelName] = { "error": "ERROR", "warning": "WARNING", @@ -89,8 +105,8 @@ def _type_check(var: Any, varname: str, types: type | tuple[type, ...]) -> None: raise TypeError(msg) -class ScanpyConfig: - """Config manager for scanpy.""" +class Settings: + """Settings manager for scanpy.""" N_PCS: int """Default number of principal components to use.""" @@ -98,6 +114,7 @@ class ScanpyConfig: def __init__( # noqa: PLR0913 self, *, + preset: Preset = Preset.ScanpyV1, verbosity: Verbosity | _VerbosityName | int = Verbosity.warning, plot_suffix: str = "", file_format_data: AnnDataFileFormat = "h5ad", @@ -117,7 +134,8 @@ def __init__( # noqa: PLR0913 _vector_friendly: bool = False, _low_resolution_warning: bool = True, n_pcs: int = 50, - ): + ) -> None: + self.preset = preset # logging self._root_logger = _RootLogger(logging.INFO) # level will be replaced self.logfile = logfile @@ -156,6 +174,15 @@ def __init__( # noqa: PLR0913 self.N_PCS = n_pcs + @property + def preset(self) -> Preset: + """Preset to use.""" + return self._preset + + @preset.setter + def preset(self, preset: Preset | str) -> None: + self._preset = Preset(preset) + @property def verbosity(self) -> Verbosity: """Verbosity level (default `warning`). @@ -232,7 +259,7 @@ def file_format_figs(self, figure_format: str) -> None: @property def autosave(self) -> bool: - """Automatically save figures in :attr:`~scanpy._settings.ScanpyConfig.figdir` (default `False`). + """Automatically save figures in :attr:`~scanpy.settings.figdir` (default `False`). Do not show plots/figures interactively. """ @@ -504,4 +531,4 @@ def __str__(self) -> str: ) -settings = ScanpyConfig() +settings = Settings() diff --git a/src/scanpy/datasets/_datasets.py b/src/scanpy/datasets/_datasets.py index 9f62b8a6d6..2b3d4516e3 100644 --- a/src/scanpy/datasets/_datasets.py +++ b/src/scanpy/datasets/_datasets.py @@ -375,7 +375,7 @@ def pbmc3k() -> AnnData: .. note:: This downloads 5.9 MB of data upon the first call of the function and stores it in - :attr:`~scanpy._settings.ScanpyConfig.datasetdir`\ `/pbmc3k_raw.h5ad`. + :attr:`~scanpy.settings.datasetdir`\ `/pbmc3k_raw.h5ad`. The following code was run to produce the file. diff --git a/src/scanpy/datasets/_ebi_expression_atlas.py b/src/scanpy/datasets/_ebi_expression_atlas.py index 824fae3c6a..da244151bc 100644 --- a/src/scanpy/datasets/_ebi_expression_atlas.py +++ b/src/scanpy/datasets/_ebi_expression_atlas.py @@ -121,7 +121,7 @@ def ebi_expression_atlas( The atlas_ can be browsed online to find the ``accession`` you want. Downloaded datasets are saved in the directory specified by - :attr:`~scanpy._settings.ScanpyConfig.datasetdir`. + :attr:`~scanpy.settings.datasetdir`. .. _atlas: https://www.ebi.ac.uk/gxa/sc/experiments diff --git a/src/scanpy/external/pp/_mnn_correct.py b/src/scanpy/external/pp/_mnn_correct.py index c702ab7f58..53fbd93170 100644 --- a/src/scanpy/external/pp/_mnn_correct.py +++ b/src/scanpy/external/pp/_mnn_correct.py @@ -111,7 +111,7 @@ def mnn_correct( # noqa: PLR0913 :attr:`~anndata.AnnData.raw` attribute. n_jobs The number of jobs. When set to `None`, automatically uses - :attr:`scanpy._settings.ScanpyConfig.n_jobs`. + :attr:`scanpy.settings.n_jobs`. kwargs optional keyword arguments for irlb. diff --git a/src/scanpy/logging.py b/src/scanpy/logging.py index ea063af75d..9d4ba85d15 100644 --- a/src/scanpy/logging.py +++ b/src/scanpy/logging.py @@ -18,7 +18,7 @@ from session_info2 import SessionInfo - from ._settings import ScanpyConfig + from ._settings import Settings # This is currently the only documented API @@ -74,7 +74,7 @@ def debug(self, msg, *, time=None, deep=None, extra=None) -> datetime: return self.log(DEBUG, msg, time=time, deep=deep, extra=extra) -def _set_log_file(settings: ScanpyConfig): +def _set_log_file(settings: Settings): file = settings.logfile name = settings.logpath root = settings._root_logger @@ -86,7 +86,7 @@ def _set_log_file(settings: ScanpyConfig): root.addHandler(h) -def _set_log_level(settings: ScanpyConfig, level: int): +def _set_log_level(settings: Settings, level: int): root = settings._root_logger root.setLevel(level) for h in list(root.handlers): diff --git a/src/scanpy/neighbors/_doc.py b/src/scanpy/neighbors/_doc.py index 28bcddca02..ad14654343 100644 --- a/src/scanpy/neighbors/_doc.py +++ b/src/scanpy/neighbors/_doc.py @@ -4,7 +4,7 @@ use_rep Use the indicated representation. `'X'` or any key for `.obsm` is valid. If `None`, the representation is chosen automatically: - For `.n_vars` < :attr:`~scanpy._settings.ScanpyConfig.N_PCS` (default: 50), `.X` is used, otherwise 'X_pca' is used. + For `.n_vars` < :attr:`~scanpy.settings.N_PCS` (default: 50), `.X` is used, otherwise 'X_pca' is used. If 'X_pca' is not present, it’s computed with default parameters or `n_pcs` if present.\ """ diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 6cc68f82f0..7c7c7224ef 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -3,7 +3,7 @@ import warnings from dataclasses import dataclass from inspect import signature -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING, Literal, cast import numba import numpy as np @@ -20,15 +20,16 @@ from ._simple import filter_genes if TYPE_CHECKING: - from typing import Literal - from numpy.typing import NDArray +Flavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] + + def _highly_variable_genes_seurat_v3( # noqa: PLR0912, PLR0915 adata: AnnData, *, - flavor: str = "seurat_v3", + flavor: Literal["seurat_v3", "seurat_v3_paper"] = "seurat_v3", layer: str | None = None, n_top_genes: int = 2000, batch_key: str | None = None, @@ -526,7 +527,7 @@ def highly_variable_genes( # noqa: PLR0913 max_mean: float = 3, span: float = 0.3, n_bins: int = 20, - flavor: Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] = "seurat", + flavor: Flavor | None = None, subset: bool = False, inplace: bool = True, batch_key: str | None = None, @@ -595,9 +596,9 @@ def highly_variable_genes( # noqa: PLR0913 the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. flavor - Choose the flavor for identifying highly variable genes. For the dispersion - based methods in their default workflows, Seurat passes the cutoffs whereas - Cell Ranger passes `n_top_genes`. + Choose the flavor for identifying highly variable genes (default depends on :preset:`highly_variable_genes`). + For the dispersion based methods in their default workflows, + `'seurat'` passes the cutoffs whereas `'cell_ranger'` passes `n_top_genes`. subset Inplace subset to highly-variable genes if `True` otherwise merely indicate highly variable genes. @@ -644,6 +645,11 @@ def highly_variable_genes( # noqa: PLR0913 This function replaces :func:`~scanpy.pp.filter_genes_dispersion`. """ + if flavor is None: + from .. import settings + + flavor = settings.preset.highly_variable_genes + start = logg.info("extracting highly variable genes") if not isinstance(adata, AnnData): diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index 91eb6b986c..a8fb274203 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -694,7 +694,7 @@ def regress_out( If provided, which element of layers to regress on. n_jobs Number of jobs for parallel computation. - `None` means using :attr:`scanpy._settings.ScanpyConfig.n_jobs`. + `None` means using :attr:`scanpy.settings.n_jobs`. copy Determines whether a copy of `adata` is returned. diff --git a/src/scanpy/tools/_tsne.py b/src/scanpy/tools/_tsne.py index 83a34f5e55..96eda7888e 100644 --- a/src/scanpy/tools/_tsne.py +++ b/src/scanpy/tools/_tsne.py @@ -90,7 +90,7 @@ def tsne( # noqa: PLR0913 If `None`, the initial state is not reproducible. n_jobs Number of jobs for parallel computation. - `None` means using :attr:`scanpy._settings.ScanpyConfig.n_jobs`. + `None` means using :attr:`scanpy.settings.n_jobs`. key_added If not specified, the embedding is stored as :attr:`~anndata.AnnData.obsm`\ `['X_tsne']` and the the parameters in From df3c3a61c2b21ecfc9bfdd99bdaff6c0c2624d6e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 2 Jun 2025 17:58:09 +0200 Subject: [PATCH 02/42] notebooks --- notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks b/notebooks index 8a5c354ef2..a883f10ed4 160000 --- a/notebooks +++ b/notebooks @@ -1 +1 @@ -Subproject commit 8a5c354ef24ea1f233cfa15512df101f833bea09 +Subproject commit a883f10ed412bd45c27e665a3e26069d89b31418 From db4c0d379e59f2a0c314e5a0f85ad139641e106c Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Mon, 2 Jun 2025 19:12:48 +0200 Subject: [PATCH 03/42] introduce singleton code --- docs/conf.py | 2 + docs/extensions/settings_singleton.py | 40 ---- src/scanpy/_settings.py | 301 +++++++++++++------------- src/scanpy/_singleton.py | 67 ++++++ src/scanpy/_utils/__init__.py | 3 +- src/scanpy/logging.py | 6 +- 6 files changed, 226 insertions(+), 193 deletions(-) delete mode 100644 docs/extensions/settings_singleton.py create mode 100644 src/scanpy/_singleton.py diff --git a/docs/conf.py b/docs/conf.py index 2e39451a1f..9809dba5c0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -2,6 +2,7 @@ from __future__ import annotations +import os import sys from datetime import datetime from functools import partial @@ -17,6 +18,7 @@ HERE = Path(__file__).parent sys.path[:0] = [str(HERE.parent), str(HERE / "extensions")] +os.environ["SPHINX_RUNNING"] = "1" # for scanpy._singleton import scanpy if TYPE_CHECKING: diff --git a/docs/extensions/settings_singleton.py b/docs/extensions/settings_singleton.py deleted file mode 100644 index 4392b07441..0000000000 --- a/docs/extensions/settings_singleton.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Extension to warn about numpydoc-style parameter types in docstrings.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from sphinx.ext import autosummary - -if TYPE_CHECKING: - from collections.abc import Sequence - from typing import Any - - from sphinx.application import Sphinx - - -import_by_name = autosummary.import_by_name - - -def _patched_import_by_name( - name: str, - prefixes: Sequence[str | None] = (None,), -) -> tuple[str, Any, Any, str]: - if name.startswith("scanpy.settings"): - prefixed_name, obj, parent, modname = import_by_name( - name.replace("scanpy.settings", "scanpy._settings.Settings"), prefixes - ) - prefixed_name = prefixed_name.replace( - "scanpy._settings.Settings", "scanpy.settings" - ) - if parent.__name__ == "scanpy._settings": - parent = import_by_name("scanpy")[1] - - return prefixed_name, obj, parent, "scanpy" - - return import_by_name(name, prefixes) - - -def setup(app: Sphinx) -> None: - """App setup hook.""" - autosummary.import_by_name = _patched_import_by_name diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 0f0b046e3b..3073ca7234 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -12,11 +12,12 @@ from . import logging from ._compat import old_positionals +from ._singleton import SingletonMeta from .logging import _RootLogger, _set_log_file, _set_log_level if TYPE_CHECKING: from collections.abc import Generator, Iterable - from typing import Any, TextIO + from typing import Any, ClassVar, Self, TextIO from .preprocessing import _highly_variable_genes as hvg @@ -105,86 +106,52 @@ def _type_check(var: Any, varname: str, types: type | tuple[type, ...]) -> None: raise TypeError(msg) -class Settings: - """Settings manager for scanpy.""" - +class SettingsMeta(SingletonMeta): + _preset: Preset + # logging + _root_logger: _RootLogger + _logfile: TextIO | None + _verbosity: Verbosity + # rest N_PCS: int """Default number of principal components to use.""" - - def __init__( # noqa: PLR0913 - self, - *, - preset: Preset = Preset.ScanpyV1, - verbosity: Verbosity | _VerbosityName | int = Verbosity.warning, - plot_suffix: str = "", - file_format_data: AnnDataFileFormat = "h5ad", - file_format_figs: str = "pdf", - autosave: bool = False, - autoshow: bool = True, - writedir: Path | str = "./write/", - cachedir: Path | str = "./cache/", - datasetdir: Path | str = "./data/", - figdir: Path | str = "./figures/", - cache_compression: str | None = "lzf", - max_memory: int = 15, - n_jobs: int = 1, - logfile: Path | str | None = None, - categories_to_ignore: Iterable[str] = ("N/A", "dontknow", "no_gate", "?"), - _frameon: bool = True, - _vector_friendly: bool = False, - _low_resolution_warning: bool = True, - n_pcs: int = 50, - ) -> None: - self.preset = preset - # logging - self._root_logger = _RootLogger(logging.INFO) # level will be replaced - self.logfile = logfile - self.verbosity = verbosity - # rest - self.plot_suffix = plot_suffix - self.file_format_data = file_format_data - self.file_format_figs = file_format_figs - self.autosave = autosave - self.autoshow = autoshow - self.writedir = writedir - self.cachedir = cachedir - self.datasetdir = datasetdir - self.figdir = figdir - self.cache_compression = cache_compression - self.max_memory = max_memory - self.n_jobs = n_jobs - self.categories_to_ignore = categories_to_ignore - self._frameon = _frameon - """bool: See set_figure_params.""" - - self._vector_friendly = _vector_friendly - """Set to true if you want to include pngs in svgs and pdfs.""" - - self._low_resolution_warning = _low_resolution_warning - """Print warning when saving a figure with low resolution.""" - - self._start = time() - """Time when the settings module is first imported.""" - - self._previous_time = self._start - """Variable for timing program parts.""" - - self._previous_memory_usage = -1 - """Stores the previous memory usage.""" - - self.N_PCS = n_pcs + _plot_suffix: str + _file_format_data: AnnDataFileFormat + _file_format_figs: str + _autosave: bool + _autoshow: bool + _writedir: Path + _cachedir: Path + _datasetdir: Path + _figdir: Path + _cache_compression: str | None + _max_memory: float + _n_jobs: int + _categories_to_ignore: list[str] + _frameon: bool + """See set_figure_params.""" + _vector_friendly: bool + """Set to true if you want to include pngs in svgs and pdfs.""" + _low_resolution_warning: bool + """Print warning when saving a figure with low resolution.""" + _start: float + """Time when the settings module is first imported.""" + _previous_time: float + """Variable for timing program parts.""" + _previous_memory_usage: int + """Stores the previous memory usage.""" @property - def preset(self) -> Preset: + def preset(cls) -> Preset: """Preset to use.""" - return self._preset + return cls._preset @preset.setter - def preset(self, preset: Preset | str) -> None: - self._preset = Preset(preset) + def preset(cls, preset: Preset | str) -> None: + cls._preset = Preset(preset) @property - def verbosity(self) -> Verbosity: + def verbosity(cls) -> Verbosity: """Verbosity level (default `warning`). Level 0: only show 'error' messages. @@ -193,17 +160,17 @@ def verbosity(self) -> Verbosity: Level 3: also show 'hint' messages. Level 4: also show very detailed progress for 'debug'ging. """ - return self._verbosity + return cls._verbosity @verbosity.setter - def verbosity(self, verbosity: Verbosity | _VerbosityName | int) -> None: + def verbosity(cls, verbosity: Verbosity | _VerbosityName | int) -> None: verbosity_str_options: list[_VerbosityName] = [ v for v in _VERBOSITY_TO_LOGLEVEL if isinstance(v, str) ] if isinstance(verbosity, Verbosity): - self._verbosity = verbosity + cls._verbosity = verbosity elif isinstance(verbosity, int): - self._verbosity = Verbosity(verbosity) + cls._verbosity = Verbosity(verbosity) elif isinstance(verbosity, str): verbosity = verbosity.lower() if verbosity not in verbosity_str_options: @@ -212,28 +179,28 @@ def verbosity(self, verbosity: Verbosity | _VerbosityName | int) -> None: f"Accepted string values are: {verbosity_str_options}" ) raise ValueError(msg) - self._verbosity = Verbosity(verbosity_str_options.index(verbosity)) + cls._verbosity = Verbosity(verbosity_str_options.index(verbosity)) else: _type_check(verbosity, "verbosity", (str, int)) - _set_log_level(self, _VERBOSITY_TO_LOGLEVEL[self._verbosity.name]) + _set_log_level(cls, _VERBOSITY_TO_LOGLEVEL[cls._verbosity.name]) @property - def plot_suffix(self) -> str: + def plot_suffix(cls) -> str: """Global suffix that is appended to figure filenames.""" - return self._plot_suffix + return cls._plot_suffix @plot_suffix.setter - def plot_suffix(self, plot_suffix: str) -> None: + def plot_suffix(cls, plot_suffix: str) -> None: _type_check(plot_suffix, "plot_suffix", str) - self._plot_suffix = plot_suffix + cls._plot_suffix = plot_suffix @property - def file_format_data(self) -> AnnDataFileFormat: + def file_format_data(cls) -> AnnDataFileFormat: """File format for saving AnnData objects.""" - return self._file_format_data + return cls._file_format_data @file_format_data.setter - def file_format_data(self, file_format: AnnDataFileFormat) -> None: + def file_format_data(cls, file_format: AnnDataFileFormat) -> None: _type_check(file_format, "file_format_data", str) if file_format not in (file_format_options := get_args(AnnDataFileFormat)): msg = ( @@ -241,16 +208,16 @@ def file_format_data(self, file_format: AnnDataFileFormat) -> None: f"Must be one of {file_format_options}" ) raise ValueError(msg) - self._file_format_data: AnnDataFileFormat = file_format + cls._file_format_data: AnnDataFileFormat = file_format @property - def file_format_figs(self) -> str: + def file_format_figs(cls) -> str: """File format for saving figures. For example `'png'`, `'pdf'` or `'svg'`. Many other formats work as well (see `matplotlib.pyplot.savefig`). """ - return self._file_format_figs + return cls._file_format_figs @file_format_figs.setter def file_format_figs(self, figure_format: str) -> None: @@ -258,131 +225,135 @@ def file_format_figs(self, figure_format: str) -> None: self._file_format_figs = figure_format @property - def autosave(self) -> bool: + def autosave(cls) -> bool: """Automatically save figures in :attr:`~scanpy.settings.figdir` (default `False`). Do not show plots/figures interactively. """ - return self._autosave + return cls._autosave @autosave.setter - def autosave(self, autosave: bool) -> None: + def autosave(cls, autosave: bool) -> None: _type_check(autosave, "autosave", bool) - self._autosave = autosave + cls._autosave = autosave @property - def autoshow(self) -> bool: + def autoshow(cls) -> bool: """Automatically show figures if `autosave == False` (default `True`). There is no need to call the matplotlib pl.show() in this case. """ - return self._autoshow + return cls._autoshow @autoshow.setter - def autoshow(self, autoshow: bool) -> None: + def autoshow(cls, autoshow: bool) -> None: _type_check(autoshow, "autoshow", bool) - self._autoshow = autoshow + cls._autoshow = autoshow @property - def writedir(self) -> Path: + def writedir(cls) -> Path: """Directory where the function scanpy.write writes to by default.""" - return self._writedir + return cls._writedir @writedir.setter - def writedir(self, writedir: Path | str) -> None: + def writedir(cls, writedir: Path | str) -> None: _type_check(writedir, "writedir", (str, Path)) - self._writedir = Path(writedir) + cls._writedir = Path(writedir) @property - def cachedir(self) -> Path: + def cachedir(cls) -> Path: """Directory for cache files (default `'./cache/'`).""" - return self._cachedir + return cls._cachedir @cachedir.setter - def cachedir(self, cachedir: Path | str) -> None: + def cachedir(cls, cachedir: Path | str) -> None: _type_check(cachedir, "cachedir", (str, Path)) - self._cachedir = Path(cachedir) + cls._cachedir = Path(cachedir) @property - def datasetdir(self) -> Path: + def datasetdir(cls) -> Path: """Directory for example :mod:`~scanpy.datasets` (default `'./data/'`).""" - return self._datasetdir + return cls._datasetdir @datasetdir.setter - def datasetdir(self, datasetdir: Path | str) -> None: + def datasetdir(cls, datasetdir: Path | str) -> None: _type_check(datasetdir, "datasetdir", (str, Path)) - self._datasetdir = Path(datasetdir).resolve() + cls._datasetdir = Path(datasetdir).resolve() @property - def figdir(self) -> Path: + def figdir(cls) -> Path: """Directory for saving figures (default `'./figures/'`).""" - return self._figdir + return cls._figdir @figdir.setter - def figdir(self, figdir: Path | str) -> None: + def figdir(cls, figdir: Path | str) -> None: _type_check(figdir, "figdir", (str, Path)) - self._figdir = Path(figdir) + cls._figdir = Path(figdir) @property - def cache_compression(self) -> str | None: + def cache_compression(cls) -> str | None: """Compression for `sc.read(..., cache=True)` (default `'lzf'`). May be `'lzf'`, `'gzip'`, or `None`. """ - return self._cache_compression + return cls._cache_compression @cache_compression.setter - def cache_compression(self, cache_compression: str | None) -> None: + def cache_compression(cls, cache_compression: str | None) -> None: if cache_compression not in {"lzf", "gzip", None}: msg = ( f"`cache_compression` ({cache_compression}) " "must be in {'lzf', 'gzip', None}" ) raise ValueError(msg) - self._cache_compression = cache_compression + cls._cache_compression = cache_compression @property - def max_memory(self) -> int | float: + def max_memory(cls) -> int | float: """Maximum memory usage in Gigabyte. Is currently not well respected… """ - return self._max_memory + return cls._max_memory @max_memory.setter - def max_memory(self, max_memory: float) -> None: + def max_memory(cls, max_memory: float) -> None: _type_check(max_memory, "max_memory", (int, float)) - self._max_memory = max_memory + cls._max_memory = max_memory @property - def n_jobs(self) -> int: + def n_jobs(cls) -> int: """Default number of jobs/ CPUs to use for parallel computing. Set to `-1` in order to use all available cores. Not all algorithms support special behavior for numbers < `-1`, so make sure to leave this setting as >= `-1`. """ - return self._n_jobs + return cls._n_jobs @n_jobs.setter - def n_jobs(self, n_jobs: int) -> None: + def n_jobs(cls, n_jobs: int) -> None: _type_check(n_jobs, "n_jobs", int) - self._n_jobs = n_jobs + cls._n_jobs = n_jobs @property - def logpath(self) -> Path | None: + def logpath(cls) -> Path | None: """The file path `logfile` was set to.""" - return self._logpath + return cls._logpath @logpath.setter - def logpath(self, logpath: Path | str | None) -> None: + def logpath(cls, logpath: Path | str | None) -> None: _type_check(logpath, "logfile", (str, Path)) + if logpath is None: + cls._logfile = None + cls._logpath = None + return # set via “file object” branch of logfile.setter - self.logfile = Path(logpath).open("a") # noqa: SIM115 - self._logpath = Path(logpath) + cls.logfile = Path(logpath).open("a") # noqa: SIM115 + cls._logpath = Path(logpath) @property - def logfile(self) -> TextIO: + def logfile(cls) -> TextIO | None: """The open file to write logs to. Set it to a :class:`~pathlib.Path` or :class:`str` to open a new one. @@ -391,30 +362,30 @@ def logfile(self) -> TextIO: For backwards compatibility, setting it to `''` behaves like setting it to `None`. """ - return self._logfile + return cls._logfile @logfile.setter - def logfile(self, logfile: Path | str | TextIO | None) -> None: + def logfile(cls, logfile: Path | str | TextIO | None) -> None: if not hasattr(logfile, "write") and logfile: - self.logpath = logfile + cls.logpath = logfile else: # file object if not logfile: # None or '' - logfile = sys.stdout if self._is_run_from_ipython() else sys.stderr - self._logfile = logfile - self._logpath = None - _set_log_file(self) + logfile = sys.stdout if cls._is_run_from_ipython() else sys.stderr + cls._logfile = logfile + cls._logpath = None + _set_log_file(cls) @property - def categories_to_ignore(self) -> list[str]: + def categories_to_ignore(cls) -> list[str]: """Categories that are omitted in plotting etc.""" - return self._categories_to_ignore + return cls._categories_to_ignore @categories_to_ignore.setter - def categories_to_ignore(self, categories_to_ignore: Iterable[str]) -> None: + def categories_to_ignore(cls, categories_to_ignore: Iterable[str]) -> None: categories_to_ignore = list(categories_to_ignore) for i, cat in enumerate(categories_to_ignore): _type_check(cat, f"categories_to_ignore[{i}]", str) - self._categories_to_ignore = categories_to_ignore + cls._categories_to_ignore = categories_to_ignore # -------------------------------------------------------------------------------- # Functions @@ -435,7 +406,7 @@ def categories_to_ignore(self, categories_to_ignore: Iterable[str]) -> None: "ipython_format", ) def set_figure_params( # noqa: PLR0913 - self, + cls, *, scanpy: bool = True, dpi: int = 80, @@ -486,7 +457,7 @@ def set_figure_params( # noqa: PLR0913 for details. """ - if self._is_run_from_ipython(): + if cls._is_run_from_ipython(): # No docs yet: https://github.com/ipython/matplotlib-inline/issues/12 from matplotlib_inline.backend_inline import set_matplotlib_formats @@ -497,8 +468,8 @@ def set_figure_params( # noqa: PLR0913 from matplotlib import rcParams - self._vector_friendly = vector_friendly - self.file_format_figs = format + cls._vector_friendly = vector_friendly + cls.file_format_figs = format if dpi is not None: rcParams["figure.dpi"] = dpi if dpi_save is not None: @@ -514,7 +485,7 @@ def set_figure_params( # noqa: PLR0913 set_rcParams_scanpy(fontsize=fontsize, color_map=color_map) if figsize is not None: rcParams["figure.figsize"] = figsize - self._frameon = frameon + cls._frameon = frameon @staticmethod def _is_run_from_ipython() -> bool: @@ -523,12 +494,44 @@ def _is_run_from_ipython() -> bool: return getattr(builtins, "__IPYTHON__", False) - def __str__(self) -> str: + def __str__(cls) -> str: return "\n".join( f"{k} = {v!r}" - for k, v in inspect.getmembers(self) + for k, v in inspect.getmembers(cls) if not k.startswith("_") and k != "getdoc" ) -settings = Settings() +class settings(metaclass=SettingsMeta): + """Settings for scanpy.""" + + def __new__(cls) -> type[Self]: + return cls + + _preset = Preset.ScanpyV1 + # logging + _root_logger: ClassVar = _RootLogger(logging.INFO) + _logfile: ClassVar = None + _logpath: ClassVar = None + _verbosity: ClassVar = Verbosity.warning + # rest + N_PCS: ClassVar = 50 + _plot_suffix: ClassVar = "" + _file_format_data: ClassVar = "h5ad" + _file_format_figs: ClassVar = "pdf" + _autosave: ClassVar = False + _autoshow: ClassVar = True + _writedir: ClassVar = Path("./write") + _cachedir: ClassVar = Path("./cache") + _datasetdir: ClassVar = Path("./data") + _figdir: ClassVar = Path("./figures") + _cache_compression: ClassVar = "lzf" + _max_memory: ClassVar = 15 + _n_jobs: ClassVar = 1 + _categories_to_ignore: ClassVar = ["N/A", "dontknow", "no_gate", "?"] + _frameon: ClassVar = True + _vector_friendly: ClassVar = False + _low_resolution_warning: ClassVar = True + _start: ClassVar = time() + _previous_time: ClassVar = _start + _previous_memory_usage: ClassVar = -1 diff --git a/src/scanpy/_singleton.py b/src/scanpy/_singleton.py new file mode 100644 index 0000000000..60fdd5f247 --- /dev/null +++ b/src/scanpy/_singleton.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import os +from traceback import extract_stack +from types import FunctionType, MethodType + + +def documenting() -> bool: + """Return whether this is being called from Sphinx. + + (but not when e.g. a log message is accessing `settings.verbosity`) + """ + if not os.environ.get("SPHINX_RUNNING"): + return False + found = False + logging = False + for frame in extract_stack(): + # Let any sphinx ext get the docstring + if frame.name in { + "eval_config_file", # Sphinx import + "generate_autosummary_docs", # Autosummary generator + # "parse_generated_content", # Autodoc parser + "get_object_members", # Class level of autodoc + "import_object", # Attr level of autodoc + }: + found = True + if frame.filename.endswith("/logging.py"): + logging = True + return found and not logging + + +class SingletonMeta(type): + def __new__(mcls, cls_name: str, *args, **kwargs): + cls = super().__new__(mcls, cls_name, *args, **kwargs) + + # We do something differently when we are imported by autosummary. + if documenting(): + props = {} + for name in dir(cls): + if name.startswith("_") or name in cls.__dict__: + continue + attr = getattr(mcls, name) + if isinstance(attr, FunctionType | MethodType): + # Circumvent https://github.com/tox-dev/sphinx-autodoc-typehints/pull/157 + setattr(cls, name, getattr(cls, name)) + if name not in cls.__dict__ and isinstance(attr, property): + # Allow autosummary to access the property, not the value + props[name] = getattr(mcls, name) + + def getattribute(_, name: str) -> object: + """Return property or value depending on whether we are in autosummary. + + If an singleton instance property/method is accessed by autodoc/autosummary, + return the property/method object, not the value/bound method. + """ + if documenting() and name in props: + return props[name] + return object.__getattribute__(cls, name) + + mcls.__getattribute__ = getattribute + + return cls + + def __dir__(cls) -> list[str]: + # Deduplicate preserving order + d = dict.fromkeys(super().__dir__()) | dict.fromkeys(dir(type(cls))) + return [k for k in d if k != "mro"] diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py index 4252f7efb9..513338ff00 100644 --- a/src/scanpy/_utils/__init__.py +++ b/src/scanpy/_utils/__init__.py @@ -207,7 +207,8 @@ def descend_classes_and_funcs(mod: ModuleType, root: str, encountered=None): def annotate_doc_types(mod: ModuleType, root: str): for c_or_f in descend_classes_and_funcs(mod, root): - c_or_f.getdoc = partial(getdoc, c_or_f) + with suppress(AttributeError): + c_or_f.getdoc = partial(getdoc, c_or_f) _leading_whitespace_re = re.compile("(^[ ]*)(?:[^ \n])", re.MULTILINE) diff --git a/src/scanpy/logging.py b/src/scanpy/logging.py index 9d4ba85d15..232062a3fe 100644 --- a/src/scanpy/logging.py +++ b/src/scanpy/logging.py @@ -18,7 +18,7 @@ from session_info2 import SessionInfo - from ._settings import Settings + from ._settings import settings # This is currently the only documented API @@ -74,7 +74,7 @@ def debug(self, msg, *, time=None, deep=None, extra=None) -> datetime: return self.log(DEBUG, msg, time=time, deep=deep, extra=extra) -def _set_log_file(settings: Settings): +def _set_log_file(settings: settings) -> None: file = settings.logfile name = settings.logpath root = settings._root_logger @@ -86,7 +86,7 @@ def _set_log_file(settings: Settings): root.addHandler(h) -def _set_log_level(settings: Settings, level: int): +def _set_log_level(settings: settings, level: int) -> None: root = settings._root_logger root.setLevel(level) for h in list(root.handlers): From af89ce5dfa27f1be210f867f3f346a09618b22db Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 3 Jun 2025 15:45:56 +0200 Subject: [PATCH 04/42] works, except for role --- docs/api/deprecated.md | 1 + docs/api/settings.md | 51 ++++++++++++------- docs/conf.py | 7 ++- pyproject.toml | 2 +- src/scanpy/__init__.py | 4 +- src/scanpy/_settings.py | 23 ++++++--- src/scanpy/_types.py | 7 +++ .../preprocessing/_highly_variable_genes.py | 9 ++-- 8 files changed, 71 insertions(+), 33 deletions(-) create mode 100644 src/scanpy/_types.py diff --git a/docs/api/deprecated.md b/docs/api/deprecated.md index d09c1af405..5a1fa82136 100644 --- a/docs/api/deprecated.md +++ b/docs/api/deprecated.md @@ -12,4 +12,5 @@ pp.filter_genes_dispersion pp.normalize_per_cell pp.subsample + logging.print_versions ``` diff --git a/docs/api/settings.md b/docs/api/settings.md index a622041a65..08f4d35b4c 100644 --- a/docs/api/settings.md +++ b/docs/api/settings.md @@ -5,6 +5,16 @@ ```{eval-rst} .. currentmodule:: scanpy + +.. + + This is a comment, yet results in autosummary generating stubs + + .. autosummary:: + :toctree: ../generated/ + + Preset + Verbosity ``` A convenience function for setting some default {obj}`matplotlib.rcParams` and a @@ -12,7 +22,7 @@ high-resolution jupyter display backend useful for use in notebooks. ```{eval-rst} .. autosummary:: - :nosignatures: + :signatures: none :toctree: ../generated/ set_figure_params @@ -22,7 +32,7 @@ An object that allows configuring Scanpy. ```{eval-rst} .. autosummary:: - :nosignatures: + :signatures: none :toctree: ../generated/ settings @@ -30,17 +40,24 @@ An object that allows configuring Scanpy. Some selected settings are discussed in the following. +Presets allow to set the behavior of many scanpy functions at once. + +```{eval-rst} +.. autosummary:: + :signatures: none + + Preset + settings.preset +``` + Influence the global behavior of plotting functions. In non-interactive scripts, you'd usually want to set `settings.autoshow` to `False`. -% no :toctree: here because they are linked under the class - ```{eval-rst} .. autosummary:: - :nosignatures: - ~settings.autoshow - ~settings.autosave + settings.autoshow + settings.autosave ``` IO related settings for saving figures, caching files and storing datasets. @@ -48,13 +65,12 @@ IO related settings for saving figures, caching files and storing datasets. ```{eval-rst} .. autosummary:: - :nosignatures: - ~settings.figdir - ~settings.cachedir - ~settings.datasetdir - ~settings.file_format_figs - ~settings.file_format_data + settings.figdir + settings.cachedir + settings.datasetdir + settings.file_format_figs + settings.file_format_data ``` The verbosity of logging output, where verbosity levels have the following @@ -63,19 +79,18 @@ details, etc. ```{eval-rst} .. autosummary:: - :nosignatures: + :signatures: none - ~settings.verbosity + Verbosity + settings.verbosity ``` Print versions of packages that might influence numerical results. ```{eval-rst} .. autosummary:: - :nosignatures: + :signatures: none :toctree: ../generated/ logging.print_header - logging.print_versions - ``` diff --git a/docs/conf.py b/docs/conf.py index 9809dba5c0..62b95bdee2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -91,7 +91,12 @@ # Generate the API documentation when building autosummary_generate = True autodoc_member_order = "bysource" -# autodoc_default_flags = ['members'] +autodoc_default_options = { + # Don’t show members in addition to the autosummary table added by `_templates/class.rst` + "members": False, + # show “Bases: SomeClass” at the top of class docs + "show-inheritance": True, +} napoleon_google_docstring = False napoleon_numpy_docstring = True napoleon_include_init_with_doc = False diff --git a/pyproject.toml b/pyproject.toml index 2558eabdf0..b863ece5da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,7 +107,7 @@ test = [ "scanpy[dask-ml]", ] doc = [ - "sphinx >=7, !=8.2.0", + "sphinx>=8.2.3", "sphinx-book-theme>=1.1.0", "scanpydoc>=0.15.3", "sphinx-autodoc-typehints>=1.25.2", diff --git a/src/scanpy/__init__.py b/src/scanpy/__init__.py index f772ceeec4..ad5021a4f6 100644 --- a/src/scanpy/__init__.py +++ b/src/scanpy/__init__.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os import sys from packaging.version import Version @@ -16,7 +15,7 @@ # the actual API # (start with settings as several tools are using it) -from ._settings import Verbosity, settings +from ._settings import Preset, Verbosity, settings set_figure_params = settings.set_figure_params @@ -63,6 +62,7 @@ __all__ = [ "AnnData", "Neighbors", + "Preset", "Verbosity", "__version__", "concat", diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 3073ca7234..8cb66b0e9c 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -19,7 +19,7 @@ from collections.abc import Generator, Iterable from typing import Any, ClassVar, Self, TextIO - from .preprocessing import _highly_variable_genes as hvg + from ._types import HVGFlavor # Collected from the print_* functions in matplotlib.backends _Format = ( @@ -35,11 +35,20 @@ class Preset(StrEnum): + """Presets for :func:`scanpy.settings.preset`. + + See properties below for details. + """ + ScanpyV1 = auto() + """Scanpy 1.*’s default settings.""" + SeuratV5 = auto() + """Try to match Seurat 5.* as closely as possible.""" @cached_property - def highly_variable_genes(self) -> hvg.Flavor: + def highly_variable_genes(self) -> HVGFlavor: + """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" match self: case Preset.ScanpyV1: return "seurat" @@ -430,8 +439,8 @@ def set_figure_params( # noqa: PLR0913 dpi Resolution of rendered figures – this influences the size of figures in notebooks. dpi_save - Resolution of saved figures. This should typically be higher to achieve - publication quality. + Resolution of saved figures. + This should typically be higher to achieve publication quality. frameon Add frames and axes labels to scatter plots. vector_friendly @@ -439,7 +448,7 @@ def set_figure_params( # noqa: PLR0913 fontsize Set the fontsize for several `rcParams` entries. Ignored if `scanpy=False`. figsize - Set plt.rcParams['figure.figsize']. + Set `rcParams['figure.figsize']`. color_map Convenience method for setting the default color map. Ignored if `scanpy=False`. format @@ -448,8 +457,8 @@ def set_figure_params( # noqa: PLR0913 Sets backgrounds via `rcParams['figure.facecolor'] = facecolor` and `rcParams['axes.facecolor'] = facecolor`. transparent - Save figures with transparent back ground. Sets - `rcParams['savefig.transparent']`. + Save figures with transparent background. + Sets `rcParams['savefig.transparent']`. ipython_format Only concerns the notebook/IPython environment; see `matplotlib_inline.backend_inline.set_matplotlib_formats diff --git a/src/scanpy/_types.py b/src/scanpy/_types.py new file mode 100644 index 0000000000..af2d430fe4 --- /dev/null +++ b/src/scanpy/_types.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from typing import Literal + +__all__ = ["HVGFlavor"] + +HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 7c7c7224ef..eac6915441 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -3,7 +3,7 @@ import warnings from dataclasses import dataclass from inspect import signature -from typing import TYPE_CHECKING, Literal, cast +from typing import TYPE_CHECKING, cast import numba import numpy as np @@ -20,10 +20,11 @@ from ._simple import filter_genes if TYPE_CHECKING: - from numpy.typing import NDArray + from typing import Literal + from numpy.typing import NDArray -Flavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] + from .._types import HVGFlavor def _highly_variable_genes_seurat_v3( # noqa: PLR0912, PLR0915 @@ -527,7 +528,7 @@ def highly_variable_genes( # noqa: PLR0913 max_mean: float = 3, span: float = 0.3, n_bins: int = 20, - flavor: Flavor | None = None, + flavor: HVGFlavor | None = None, subset: bool = False, inplace: bool = True, batch_key: str | None = None, From 513c417d50de196b639b29fa4a4405e1b4e85c2e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 3 Jun 2025 15:48:48 +0200 Subject: [PATCH 05/42] just use `attr` link --- src/scanpy/preprocessing/_highly_variable_genes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index eac6915441..87afb306e5 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -597,7 +597,7 @@ def highly_variable_genes( # noqa: PLR0913 the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. flavor - Choose the flavor for identifying highly variable genes (default depends on :preset:`highly_variable_genes`). + Choose the flavor for identifying highly variable genes (default depends on :attr:`~scanpy.Preset.highly_variable_genes`). For the dispersion based methods in their default workflows, `'seurat'` passes the cutoffs whereas `'cell_ranger'` passes `n_top_genes`. subset From b7d5a172516302479ac5f6046f5f494a0435bb26 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 3 Jun 2025 17:20:40 +0200 Subject: [PATCH 06/42] skip inherited members --- docs/extensions/autosummary_skip_inherited.py | 48 +++++++++++++++++++ src/scanpy/_settings.py | 25 ++++------ 2 files changed, 58 insertions(+), 15 deletions(-) create mode 100644 docs/extensions/autosummary_skip_inherited.py diff --git a/docs/extensions/autosummary_skip_inherited.py b/docs/extensions/autosummary_skip_inherited.py new file mode 100644 index 0000000000..b299a4ecd6 --- /dev/null +++ b/docs/extensions/autosummary_skip_inherited.py @@ -0,0 +1,48 @@ +"""Extension to skip inherited methods and properties in autosummary.""" + +from __future__ import annotations + +from traceback import walk_stack +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Literal + + from sphinx.application import Sphinx + from sphinx.ext.autodoc import Options + + +def skip_inherited( # noqa: PLR0917 + app: Sphinx, + what: Literal["module", "class", "exception", "function", "method", "attribute"], + name: str, + obj: object, + skip: bool, # noqa: FBT001 + options: Options | dict[str, object], +) -> bool | None: + """Skip inherited members.""" + # Skip `getdoc` property + if what == "method" and name == "getdoc": + return True + + # find parent class + for frame, _ in walk_stack(None): + if frame.f_code.co_name == "_get_members" and frame.f_code.co_filename.endswith( + "/generate.py" + ): + parent = frame.f_locals["obj"] + if not isinstance(parent, type): + return None + break + else: + return None + + # skip if not a direct member of parent class + if name not in parent.__dict__: + return True + return None + + +def setup(app: Sphinx) -> None: + """App setup hook.""" + app.connect("autodoc-skip-member", skip_inherited) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 8cb66b0e9c..512fab11fc 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -70,10 +70,15 @@ class Verbosity(IntEnum): """Logging verbosity levels.""" error = 0 + """Error (0)""" warning = 1 + """Warning (1)""" info = 2 + """Info (2)""" hint = 3 + """Hint (3)""" debug = 4 + """Debug (4)""" def __eq__(self, other: object) -> bool: if isinstance(other, Verbosity): @@ -133,7 +138,7 @@ class SettingsMeta(SingletonMeta): _cachedir: Path _datasetdir: Path _figdir: Path - _cache_compression: str | None + _cache_compression: Literal["lzf", "gzip", None] _max_memory: float _n_jobs: int _categories_to_ignore: list[str] @@ -161,14 +166,7 @@ def preset(cls, preset: Preset | str) -> None: @property def verbosity(cls) -> Verbosity: - """Verbosity level (default `warning`). - - Level 0: only show 'error' messages. - Level 1: also show 'warning' messages. - Level 2: also show 'info' messages. - Level 3: also show 'hint' messages. - Level 4: also show very detailed progress for 'debug'ging. - """ + """Verbosity level (default :attr:`Verbosity.warning`).""" return cls._verbosity @verbosity.setter @@ -300,15 +298,12 @@ def figdir(cls, figdir: Path | str) -> None: cls._figdir = Path(figdir) @property - def cache_compression(cls) -> str | None: - """Compression for `sc.read(..., cache=True)` (default `'lzf'`). - - May be `'lzf'`, `'gzip'`, or `None`. - """ + def cache_compression(cls) -> Literal["lzf", "gzip", None]: + """Compression for `sc.read(..., cache=True)` (default `'lzf'`).""" return cls._cache_compression @cache_compression.setter - def cache_compression(cls, cache_compression: str | None) -> None: + def cache_compression(cls, cache_compression: Literal["lzf", "gzip", None]) -> None: if cache_compression not in {"lzf", "gzip", None}: msg = ( f"`cache_compression` ({cache_compression}) " From 736c752824578f287000e8a55c37f1a7e23f983d Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 3 Jun 2025 18:24:34 +0200 Subject: [PATCH 07/42] skip inherited stuff --- docs/extensions/autosummary_skip_inherited.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/extensions/autosummary_skip_inherited.py b/docs/extensions/autosummary_skip_inherited.py index b299a4ecd6..376f4c4e79 100644 --- a/docs/extensions/autosummary_skip_inherited.py +++ b/docs/extensions/autosummary_skip_inherited.py @@ -14,7 +14,9 @@ def skip_inherited( # noqa: PLR0917 app: Sphinx, - what: Literal["module", "class", "exception", "function", "method", "attribute"], + what: Literal[ + "module", "class", "exception", "function", "method", "attribute", "property" + ], name: str, obj: object, skip: bool, # noqa: FBT001 @@ -37,10 +39,15 @@ def skip_inherited( # noqa: PLR0917 else: return None - # skip if not a direct member of parent class - if name not in parent.__dict__: - return True - return None + # return if it’s a member of the parent class + typ = parent + while typ is not type: + if name in typ.__dict__: + return None + typ = type(typ) + + # skip since we know it’s not a member of the parent class + return True def setup(app: Sphinx) -> None: From 368ea843222f79c07a07e2c9afedadb4669ce0c1 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 3 Jun 2025 18:54:43 +0200 Subject: [PATCH 08/42] fix organization --- docs/api/settings.md | 38 +++++++++++++------------------------- src/scanpy/_settings.py | 14 +++++++------- 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/docs/api/settings.md b/docs/api/settings.md index 08f4d35b4c..038ea7c500 100644 --- a/docs/api/settings.md +++ b/docs/api/settings.md @@ -5,16 +5,6 @@ ```{eval-rst} .. currentmodule:: scanpy - -.. - - This is a comment, yet results in autosummary generating stubs - - .. autosummary:: - :toctree: ../generated/ - - Preset - Verbosity ``` A convenience function for setting some default {obj}`matplotlib.rcParams` and a @@ -40,18 +30,28 @@ An object that allows configuring Scanpy. Some selected settings are discussed in the following. -Presets allow to set the behavior of many scanpy functions at once. +Presets allow to set the behavior of many scanpy functions at once: ```{eval-rst} .. autosummary:: :signatures: none + :toctree: ../generated/ Preset - settings.preset +``` + +Verbosity controls the amount of logging output: + +```{eval-rst} +.. autosummary:: + :signatures: none + :toctree: ../generated/ + + Verbosity ``` Influence the global behavior of plotting functions. In non-interactive scripts, -you'd usually want to set `settings.autoshow` to `False`. +you'd usually want to set {attr}`settings.autoshow` to `False`. ```{eval-rst} .. autosummary:: @@ -73,18 +73,6 @@ IO related settings for saving figures, caching files and storing datasets. settings.file_format_data ``` -The verbosity of logging output, where verbosity levels have the following -meaning: 0='error', 1='warning', 2='info', 3='hint', 4=more details, 5=even more -details, etc. - -```{eval-rst} -.. autosummary:: - :signatures: none - - Verbosity - settings.verbosity -``` - Print versions of packages that might influence numerical results. ```{eval-rst} diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 512fab11fc..05e9229414 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -35,7 +35,7 @@ class Preset(StrEnum): - """Presets for :func:`scanpy.settings.preset`. + """Presets for :attr:`scanpy.settings.preset`. See properties below for details. """ @@ -67,18 +67,18 @@ def highly_variable_genes(self) -> HVGFlavor: class Verbosity(IntEnum): - """Logging verbosity levels.""" + """Logging verbosity levels for :attr:`scanpy.settings.verbosity`.""" error = 0 - """Error (0)""" + """Error (`0`)""" warning = 1 - """Warning (1)""" + """Warning (`1`)""" info = 2 - """Info (2)""" + """Info (`2`)""" hint = 3 - """Hint (3)""" + """Hint (`3`)""" debug = 4 - """Debug (4)""" + """Debug (`4`)""" def __eq__(self, other: object) -> bool: if isinstance(other, Verbosity): From 5c260d4263136c20dd632a4fa58d5cf7a529622e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 09:36:00 +0200 Subject: [PATCH 09/42] fix verbosity documentation --- src/scanpy/_settings.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 05e9229414..324c6905d5 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -3,7 +3,7 @@ import inspect import sys from contextlib import contextmanager -from enum import IntEnum, StrEnum, auto +from enum import EnumMeta, IntEnum, StrEnum, auto from functools import cached_property from logging import getLevelNamesMapping from pathlib import Path @@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Literal, get_args from . import logging -from ._compat import old_positionals +from ._compat import deprecated, old_positionals from ._singleton import SingletonMeta from .logging import _RootLogger, _set_log_file, _set_log_level @@ -66,7 +66,14 @@ def highly_variable_genes(self) -> HVGFlavor: _VERBOSITY_TO_LOGLEVEL.update(dict(enumerate(list(_VERBOSITY_TO_LOGLEVEL.values())))) -class Verbosity(IntEnum): +class VerbosityMeta(EnumMeta): + @property + @deprecated("Use `Verbosity.warning` instead") + def warn(cls) -> Verbosity: + return Verbosity.warning + + +class Verbosity(IntEnum, metaclass=VerbosityMeta): """Logging verbosity levels for :attr:`scanpy.settings.verbosity`.""" error = 0 @@ -91,6 +98,7 @@ def __eq__(self, other: object) -> bool: @property def level(self) -> int: + """The :ref:`logging level ` corresponding to this verbosity level.""" m = getLevelNamesMapping() return m[_VERBOSITY_TO_LOGLEVEL[self.name]] @@ -104,10 +112,6 @@ def override( settings.verbosity = self -# backwards compat -Verbosity.warn = Verbosity.warning - - def _type_check(var: Any, varname: str, types: type | tuple[type, ...]) -> None: if isinstance(var, types): return From b630891d6071ac8a8e215683944eda890134ae61 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 09:48:44 +0200 Subject: [PATCH 10/42] more docs --- src/scanpy/_settings.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 324c6905d5..098530222c 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -106,7 +106,17 @@ def level(self) -> int: def override( self, verbosity: Verbosity | _VerbosityName | int ) -> Generator[Verbosity, None, None]: - """Temporarily override verbosity.""" + """Temporarily override verbosity. + + >>> import scanpy as sc + >>> sc.settings.verbosity + + >>> with sc.settings.verbosity.override(settings.verbosity.debug): + ... sc.settings.verbosity + + >>> sc.settings.verbosity + + """ settings.verbosity = verbosity yield self settings.verbosity = self From 5028415154048b546b624563c0b1903e104aa085 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 09:55:41 +0200 Subject: [PATCH 11/42] more verbosity docs --- docs/release-notes/1.10.0.md | 2 +- src/scanpy/_settings.py | 5 ++--- src/scanpy/external/tl/_phate.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/release-notes/1.10.0.md b/docs/release-notes/1.10.0.md index 2e23d24426..969633db0b 100644 --- a/docs/release-notes/1.10.0.md +++ b/docs/release-notes/1.10.0.md @@ -47,7 +47,7 @@ Some highlights: * Updated {func}`~scanpy.read_visium` such that it can read spaceranger 2.0 files {smaller}`L Lehner` * Fix {func}`~scanpy.pp.normalize_total` for dask {pr}`2466` {smaller}`P Angerer` -* Fix setting `sc.settings.verbosity` in some cases {pr}`2605` {smaller}`P Angerer` +* Fix setting :attr:`scanpy.settings.verbosity` in some cases {pr}`2605` {smaller}`P Angerer` * Fix all remaining pandas warnings {pr}`2789` {smaller}`P Angerer` * Fix some annoying plotting warnings around violin plots {pr}`2844` {smaller}`P Angerer` * Scanpy now has a test job which tests against the minumum versions of the dependencies. In the process of implementing this, many bugs associated with using older versions of `pandas`, `anndata`, `numpy`, and `matplotlib` were fixed. {pr}`2816` {smaller}`I Virshup` diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 098530222c..adf6ad5577 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -109,13 +109,12 @@ def override( """Temporarily override verbosity. >>> import scanpy as sc - >>> sc.settings.verbosity - + >>> sc.settings.verbosity = sc.Verbosity.info >>> with sc.settings.verbosity.override(settings.verbosity.debug): ... sc.settings.verbosity >>> sc.settings.verbosity - + """ settings.verbosity = verbosity yield self diff --git a/src/scanpy/external/tl/_phate.py b/src/scanpy/external/tl/_phate.py index 19f7df17c7..efe4fc6dc7 100644 --- a/src/scanpy/external/tl/_phate.py +++ b/src/scanpy/external/tl/_phate.py @@ -111,8 +111,8 @@ def phate( # noqa: PLR0913 random_state Random seed. Defaults to the global `numpy` random number generator verbose - If `True` or an `int`/`Verbosity` ≥ 2/`hint`, print status messages. - If `None`, `sc.settings.verbosity` is used. + If `True` or an :class:`int`/:class:`~scanpy.Verbosity` ≥ 2/:attr:`~scanpy.Verbosity.hint`, print status messages. + If `None`, :attr:`scanpy.settings.verbosity` is used. copy Return a copy instead of writing to `adata`. kwargs From dcd783afad9335c2269aee32a2eadc08bf31f76a Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 10:06:00 +0200 Subject: [PATCH 12/42] document all intended settings attrs --- .../extensions/autosummary_skip_deprecated.py | 32 +++++++++++++++++++ docs/release-notes/1.0.0.md | 2 +- src/scanpy/__init__.py | 2 +- src/scanpy/_settings.py | 21 +++++++++--- src/scanpy/plotting/_rcmod.py | 2 +- 5 files changed, 52 insertions(+), 7 deletions(-) create mode 100644 docs/extensions/autosummary_skip_deprecated.py diff --git a/docs/extensions/autosummary_skip_deprecated.py b/docs/extensions/autosummary_skip_deprecated.py new file mode 100644 index 0000000000..755ffc08a6 --- /dev/null +++ b/docs/extensions/autosummary_skip_deprecated.py @@ -0,0 +1,32 @@ +"""Extension to skip deprecated methods and properties in autosummary.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Literal + + from sphinx.application import Sphinx + from sphinx.ext.autodoc import Options + + +def skip_deprecated( # noqa: PLR0917 + app: Sphinx, + what: Literal[ + "module", "class", "exception", "function", "method", "attribute", "property" + ], + name: str, + obj: object, + skip: bool, # noqa: FBT001 + options: Options | dict[str, object], +) -> bool | None: + """Skip deprecated members.""" + if hasattr(obj, "__deprecated__"): + return True + return None + + +def setup(app: Sphinx) -> None: + """App setup hook.""" + app.connect("autodoc-skip-member", skip_deprecated) diff --git a/docs/release-notes/1.0.0.md b/docs/release-notes/1.0.0.md index 00fa8b43db..2a492ad92a 100644 --- a/docs/release-notes/1.0.0.md +++ b/docs/release-notes/1.0.0.md @@ -50,7 +50,7 @@ - {func}`~scanpy.tl.louvain` provides a better implementation for reclustering via `restrict_to` {smaller}`A Wolf` - scanpy no longer modifies rcParams upon import, call - `settings.set_figure_params` to set the 'scanpy style' {smaller}`A Wolf` + :func:`scanpy.set_figure_params` to set the 'scanpy style' {smaller}`A Wolf` - default cache directory is `./cache/`, set `settings.cachedir` to change this; nested directories in this are avoided {smaller}`A Wolf` - show edges in scatter plots based on graph visualization diff --git a/src/scanpy/__init__.py b/src/scanpy/__init__.py index ad5021a4f6..1480d7e01e 100644 --- a/src/scanpy/__init__.py +++ b/src/scanpy/__init__.py @@ -17,7 +17,7 @@ from ._settings import Preset, Verbosity, settings -set_figure_params = settings.set_figure_params +set_figure_params = settings._set_figure_params import anndata diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index adf6ad5577..8aab829a66 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -140,8 +140,7 @@ class SettingsMeta(SingletonMeta): _logfile: TextIO | None _verbosity: Verbosity # rest - N_PCS: int - """Default number of principal components to use.""" + _n_pcs: int _plot_suffix: str _file_format_data: AnnDataFileFormat _file_format_figs: str @@ -204,6 +203,16 @@ def verbosity(cls, verbosity: Verbosity | _VerbosityName | int) -> None: _type_check(verbosity, "verbosity", (str, int)) _set_log_level(cls, _VERBOSITY_TO_LOGLEVEL[cls._verbosity.name]) + @property + def N_PCS(cls) -> int: + """Default number of principal components to use.""" + return cls._n_pcs + + @N_PCS.setter + def N_PCS(cls, n_pcs: int) -> None: + _type_check(n_pcs, "n_pcs", int) + cls._n_pcs = n_pcs + @property def plot_suffix(cls) -> str: """Global suffix that is appended to figure filenames.""" @@ -408,6 +417,10 @@ def categories_to_ignore(cls, categories_to_ignore: Iterable[str]) -> None: # Functions # -------------------------------------------------------------------------------- + @deprecated("Use `scanpy.set_figure_params` instead") + def set_figure_params(cls, *args, **kwargs) -> None: + cls.set_figure_params(*args, **kwargs) + @old_positionals( "scanpy", "dpi", @@ -422,7 +435,7 @@ def categories_to_ignore(cls, categories_to_ignore: Iterable[str]) -> None: "transparent", "ipython_format", ) - def set_figure_params( # noqa: PLR0913 + def _set_figure_params( # noqa: PLR0913 cls, *, scanpy: bool = True, @@ -532,7 +545,7 @@ def __new__(cls) -> type[Self]: _logpath: ClassVar = None _verbosity: ClassVar = Verbosity.warning # rest - N_PCS: ClassVar = 50 + _n_pcs: ClassVar = 50 _plot_suffix: ClassVar = "" _file_format_data: ClassVar = "h5ad" _file_format_figs: ClassVar = "pdf" diff --git a/src/scanpy/plotting/_rcmod.py b/src/scanpy/plotting/_rcmod.py index 4936741783..344907de28 100644 --- a/src/scanpy/plotting/_rcmod.py +++ b/src/scanpy/plotting/_rcmod.py @@ -12,7 +12,7 @@ def set_rcParams_scanpy(fontsize=14, color_map=None): """Set matplotlib.rcParams to Scanpy defaults. - Call this through `settings.set_figure_params`. + Call this through :func:`scanpy.set_figure_params`. """ # figure rcParams["figure.figsize"] = (4, 4) From 8a626c28da90e164847c509a1bc14cd495397c30 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 10:25:11 +0200 Subject: [PATCH 13/42] more links --- src/scanpy/_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 8aab829a66..6a3a4d0e0e 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -244,7 +244,7 @@ def file_format_figs(cls) -> str: """File format for saving figures. For example `'png'`, `'pdf'` or `'svg'`. Many other formats work as well (see - `matplotlib.pyplot.savefig`). + :func:`matplotlib.pyplot.savefig`). """ return cls._file_format_figs From c9a06dd3108fc543e04e9fcacd3b5987c4c1a6ec Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 11:35:35 +0200 Subject: [PATCH 14/42] document defaults --- src/scanpy/_settings.py | 64 ++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 6a3a4d0e0e..9e52f146cb 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -1,14 +1,15 @@ from __future__ import annotations import inspect +import re import sys from contextlib import contextmanager from enum import EnumMeta, IntEnum, StrEnum, auto -from functools import cached_property +from functools import cached_property, partial, wraps from logging import getLevelNamesMapping from pathlib import Path from time import time -from typing import TYPE_CHECKING, Literal, get_args +from typing import TYPE_CHECKING, Literal, LiteralString, TypeVar, get_args from . import logging from ._compat import deprecated, old_positionals @@ -16,7 +17,7 @@ from .logging import _RootLogger, _set_log_file, _set_log_level if TYPE_CHECKING: - from collections.abc import Generator, Iterable + from collections.abc import Callable, Generator, Iterable, Mapping from typing import Any, ClassVar, Self, TextIO from ._types import HVGFlavor @@ -30,10 +31,50 @@ _VerbosityName = Literal["error", "warning", "info", "hint", "debug"] _LoggingLevelName = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "HINT", "DEBUG"] +T = TypeVar("T", bound=LiteralString) + AnnDataFileFormat = Literal["h5ad", "zarr"] +_preset_postprocessors: list[Callable[[], None]] = [] + + +def _postprocess_preset_prop( + prop: cached_property[T], + param: str, + get_map: Callable[[], Mapping[Preset, LiteralString]], +) -> None: + map = get_map() + + map_type = inspect.signature(get_map).return_annotation + value_type = re.fullmatch(r"Mapping\[Preset, (.*)\]", map_type)[1] + + added_doc = "\n".join( + f":attr:`{k.name}`\n Default: `{param}={v!r}`" for k, v in map.items() + ) + + prop.__doc__ = f"{prop.__doc__}\n\n{added_doc}" + prop.func.__annotations__["return"] = value_type + + +def _preset_property( + param: str, +) -> Callable[[Callable[[], Mapping[Preset, T]]], cached_property[T]]: + def decorator(get_map: Callable[[], Mapping[Preset, T]]) -> cached_property[T]: + @wraps(get_map) + def get(self: Preset) -> T: + return get_map()[self] + + prop = cached_property(get) + _preset_postprocessors.append( + partial(_postprocess_preset_prop, prop, param, get_map) + ) + return prop + + return decorator + + class Preset(StrEnum): """Presets for :attr:`scanpy.settings.preset`. @@ -46,14 +87,17 @@ class Preset(StrEnum): SeuratV5 = auto() """Try to match Seurat 5.* as closely as possible.""" - @cached_property - def highly_variable_genes(self) -> HVGFlavor: + @_preset_property("flavor") + def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" - match self: - case Preset.ScanpyV1: - return "seurat" - case Preset.SeuratV5: - return "seurat_v3" + return { + Preset.ScanpyV1: "seurat", + Preset.SeuratV5: "seurat_v3", + } + + +for _postprocess in _preset_postprocessors: + _postprocess() _VERBOSITY_TO_LOGLEVEL: dict[int | _VerbosityName, _LoggingLevelName] = { From 3bae7ae591f954a5401e057be6cd754d71203f91 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 11:40:08 +0200 Subject: [PATCH 15/42] improve other side --- src/scanpy/preprocessing/_highly_variable_genes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 87afb306e5..1e5444cbba 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -597,7 +597,8 @@ def highly_variable_genes( # noqa: PLR0913 the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. flavor - Choose the flavor for identifying highly variable genes (default depends on :attr:`~scanpy.Preset.highly_variable_genes`). + Choose the flavor for identifying highly variable genes + (default depends on :attr:`scanpy.settings.preset` property :attr:`~scanpy.Preset.highly_variable_genes`). For the dispersion based methods in their default workflows, `'seurat'` passes the cutoffs whereas `'cell_ranger'` passes `n_top_genes`. subset From b15854518095a01ab1daf2a9225a2521a8645f11 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 12:04:27 +0200 Subject: [PATCH 16/42] undo presets --- docs/api/settings.md | 10 --- src/scanpy/__init__.py | 3 +- src/scanpy/_settings.py | 82 +------------------ .../preprocessing/_highly_variable_genes.py | 14 +--- 4 files changed, 7 insertions(+), 102 deletions(-) diff --git a/docs/api/settings.md b/docs/api/settings.md index 038ea7c500..acec01851a 100644 --- a/docs/api/settings.md +++ b/docs/api/settings.md @@ -30,16 +30,6 @@ An object that allows configuring Scanpy. Some selected settings are discussed in the following. -Presets allow to set the behavior of many scanpy functions at once: - -```{eval-rst} -.. autosummary:: - :signatures: none - :toctree: ../generated/ - - Preset -``` - Verbosity controls the amount of logging output: ```{eval-rst} diff --git a/src/scanpy/__init__.py b/src/scanpy/__init__.py index 1480d7e01e..47bffe370b 100644 --- a/src/scanpy/__init__.py +++ b/src/scanpy/__init__.py @@ -15,7 +15,7 @@ # the actual API # (start with settings as several tools are using it) -from ._settings import Preset, Verbosity, settings +from ._settings import Verbosity, settings set_figure_params = settings._set_figure_params @@ -62,7 +62,6 @@ __all__ = [ "AnnData", "Neighbors", - "Preset", "Verbosity", "__version__", "concat", diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 9e52f146cb..c3701991d5 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -1,11 +1,9 @@ from __future__ import annotations import inspect -import re import sys from contextlib import contextmanager -from enum import EnumMeta, IntEnum, StrEnum, auto -from functools import cached_property, partial, wraps +from enum import EnumMeta, IntEnum from logging import getLevelNamesMapping from pathlib import Path from time import time @@ -17,11 +15,9 @@ from .logging import _RootLogger, _set_log_file, _set_log_level if TYPE_CHECKING: - from collections.abc import Callable, Generator, Iterable, Mapping + from collections.abc import Generator, Iterable from typing import Any, ClassVar, Self, TextIO - from ._types import HVGFlavor - # Collected from the print_* functions in matplotlib.backends _Format = ( Literal["png", "jpg", "tif", "tiff"] # noqa: PYI030 @@ -37,69 +33,6 @@ AnnDataFileFormat = Literal["h5ad", "zarr"] -_preset_postprocessors: list[Callable[[], None]] = [] - - -def _postprocess_preset_prop( - prop: cached_property[T], - param: str, - get_map: Callable[[], Mapping[Preset, LiteralString]], -) -> None: - map = get_map() - - map_type = inspect.signature(get_map).return_annotation - value_type = re.fullmatch(r"Mapping\[Preset, (.*)\]", map_type)[1] - - added_doc = "\n".join( - f":attr:`{k.name}`\n Default: `{param}={v!r}`" for k, v in map.items() - ) - - prop.__doc__ = f"{prop.__doc__}\n\n{added_doc}" - prop.func.__annotations__["return"] = value_type - - -def _preset_property( - param: str, -) -> Callable[[Callable[[], Mapping[Preset, T]]], cached_property[T]]: - def decorator(get_map: Callable[[], Mapping[Preset, T]]) -> cached_property[T]: - @wraps(get_map) - def get(self: Preset) -> T: - return get_map()[self] - - prop = cached_property(get) - _preset_postprocessors.append( - partial(_postprocess_preset_prop, prop, param, get_map) - ) - return prop - - return decorator - - -class Preset(StrEnum): - """Presets for :attr:`scanpy.settings.preset`. - - See properties below for details. - """ - - ScanpyV1 = auto() - """Scanpy 1.*’s default settings.""" - - SeuratV5 = auto() - """Try to match Seurat 5.* as closely as possible.""" - - @_preset_property("flavor") - def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: - """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" - return { - Preset.ScanpyV1: "seurat", - Preset.SeuratV5: "seurat_v3", - } - - -for _postprocess in _preset_postprocessors: - _postprocess() - - _VERBOSITY_TO_LOGLEVEL: dict[int | _VerbosityName, _LoggingLevelName] = { "error": "ERROR", "warning": "WARNING", @@ -178,7 +111,6 @@ def _type_check(var: Any, varname: str, types: type | tuple[type, ...]) -> None: class SettingsMeta(SingletonMeta): - _preset: Preset # logging _root_logger: _RootLogger _logfile: TextIO | None @@ -211,15 +143,6 @@ class SettingsMeta(SingletonMeta): _previous_memory_usage: int """Stores the previous memory usage.""" - @property - def preset(cls) -> Preset: - """Preset to use.""" - return cls._preset - - @preset.setter - def preset(cls, preset: Preset | str) -> None: - cls._preset = Preset(preset) - @property def verbosity(cls) -> Verbosity: """Verbosity level (default :attr:`Verbosity.warning`).""" @@ -582,7 +505,6 @@ class settings(metaclass=SettingsMeta): def __new__(cls) -> type[Self]: return cls - _preset = Preset.ScanpyV1 # logging _root_logger: ClassVar = _RootLogger(logging.INFO) _logfile: ClassVar = None diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 1e5444cbba..0382b5fe9f 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -528,7 +528,7 @@ def highly_variable_genes( # noqa: PLR0913 max_mean: float = 3, span: float = 0.3, n_bins: int = 20, - flavor: HVGFlavor | None = None, + flavor: HVGFlavor = "seurat", subset: bool = False, inplace: bool = True, batch_key: str | None = None, @@ -597,10 +597,9 @@ def highly_variable_genes( # noqa: PLR0913 the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. flavor - Choose the flavor for identifying highly variable genes - (default depends on :attr:`scanpy.settings.preset` property :attr:`~scanpy.Preset.highly_variable_genes`). - For the dispersion based methods in their default workflows, - `'seurat'` passes the cutoffs whereas `'cell_ranger'` passes `n_top_genes`. + Choose the flavor for identifying highly variable genes. For the dispersion + based methods in their default workflows, Seurat passes the cutoffs whereas + Cell Ranger passes `n_top_genes`. subset Inplace subset to highly-variable genes if `True` otherwise merely indicate highly variable genes. @@ -647,11 +646,6 @@ def highly_variable_genes( # noqa: PLR0913 This function replaces :func:`~scanpy.pp.filter_genes_dispersion`. """ - if flavor is None: - from .. import settings - - flavor = settings.preset.highly_variable_genes - start = logg.info("extracting highly variable genes") if not isinstance(adata, AnnData): From 26a4ec944b7ec2d3b81cff20186b2d6af4780e99 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 12:06:31 +0200 Subject: [PATCH 17/42] Revert "undo presets" This reverts commit b15854518095a01ab1daf2a9225a2521a8645f11. --- docs/api/settings.md | 10 +++ src/scanpy/__init__.py | 3 +- src/scanpy/_settings.py | 82 ++++++++++++++++++- .../preprocessing/_highly_variable_genes.py | 14 +++- 4 files changed, 102 insertions(+), 7 deletions(-) diff --git a/docs/api/settings.md b/docs/api/settings.md index acec01851a..038ea7c500 100644 --- a/docs/api/settings.md +++ b/docs/api/settings.md @@ -30,6 +30,16 @@ An object that allows configuring Scanpy. Some selected settings are discussed in the following. +Presets allow to set the behavior of many scanpy functions at once: + +```{eval-rst} +.. autosummary:: + :signatures: none + :toctree: ../generated/ + + Preset +``` + Verbosity controls the amount of logging output: ```{eval-rst} diff --git a/src/scanpy/__init__.py b/src/scanpy/__init__.py index 47bffe370b..1480d7e01e 100644 --- a/src/scanpy/__init__.py +++ b/src/scanpy/__init__.py @@ -15,7 +15,7 @@ # the actual API # (start with settings as several tools are using it) -from ._settings import Verbosity, settings +from ._settings import Preset, Verbosity, settings set_figure_params = settings._set_figure_params @@ -62,6 +62,7 @@ __all__ = [ "AnnData", "Neighbors", + "Preset", "Verbosity", "__version__", "concat", diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index c3701991d5..9e52f146cb 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -1,9 +1,11 @@ from __future__ import annotations import inspect +import re import sys from contextlib import contextmanager -from enum import EnumMeta, IntEnum +from enum import EnumMeta, IntEnum, StrEnum, auto +from functools import cached_property, partial, wraps from logging import getLevelNamesMapping from pathlib import Path from time import time @@ -15,9 +17,11 @@ from .logging import _RootLogger, _set_log_file, _set_log_level if TYPE_CHECKING: - from collections.abc import Generator, Iterable + from collections.abc import Callable, Generator, Iterable, Mapping from typing import Any, ClassVar, Self, TextIO + from ._types import HVGFlavor + # Collected from the print_* functions in matplotlib.backends _Format = ( Literal["png", "jpg", "tif", "tiff"] # noqa: PYI030 @@ -33,6 +37,69 @@ AnnDataFileFormat = Literal["h5ad", "zarr"] +_preset_postprocessors: list[Callable[[], None]] = [] + + +def _postprocess_preset_prop( + prop: cached_property[T], + param: str, + get_map: Callable[[], Mapping[Preset, LiteralString]], +) -> None: + map = get_map() + + map_type = inspect.signature(get_map).return_annotation + value_type = re.fullmatch(r"Mapping\[Preset, (.*)\]", map_type)[1] + + added_doc = "\n".join( + f":attr:`{k.name}`\n Default: `{param}={v!r}`" for k, v in map.items() + ) + + prop.__doc__ = f"{prop.__doc__}\n\n{added_doc}" + prop.func.__annotations__["return"] = value_type + + +def _preset_property( + param: str, +) -> Callable[[Callable[[], Mapping[Preset, T]]], cached_property[T]]: + def decorator(get_map: Callable[[], Mapping[Preset, T]]) -> cached_property[T]: + @wraps(get_map) + def get(self: Preset) -> T: + return get_map()[self] + + prop = cached_property(get) + _preset_postprocessors.append( + partial(_postprocess_preset_prop, prop, param, get_map) + ) + return prop + + return decorator + + +class Preset(StrEnum): + """Presets for :attr:`scanpy.settings.preset`. + + See properties below for details. + """ + + ScanpyV1 = auto() + """Scanpy 1.*’s default settings.""" + + SeuratV5 = auto() + """Try to match Seurat 5.* as closely as possible.""" + + @_preset_property("flavor") + def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: + """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" + return { + Preset.ScanpyV1: "seurat", + Preset.SeuratV5: "seurat_v3", + } + + +for _postprocess in _preset_postprocessors: + _postprocess() + + _VERBOSITY_TO_LOGLEVEL: dict[int | _VerbosityName, _LoggingLevelName] = { "error": "ERROR", "warning": "WARNING", @@ -111,6 +178,7 @@ def _type_check(var: Any, varname: str, types: type | tuple[type, ...]) -> None: class SettingsMeta(SingletonMeta): + _preset: Preset # logging _root_logger: _RootLogger _logfile: TextIO | None @@ -143,6 +211,15 @@ class SettingsMeta(SingletonMeta): _previous_memory_usage: int """Stores the previous memory usage.""" + @property + def preset(cls) -> Preset: + """Preset to use.""" + return cls._preset + + @preset.setter + def preset(cls, preset: Preset | str) -> None: + cls._preset = Preset(preset) + @property def verbosity(cls) -> Verbosity: """Verbosity level (default :attr:`Verbosity.warning`).""" @@ -505,6 +582,7 @@ class settings(metaclass=SettingsMeta): def __new__(cls) -> type[Self]: return cls + _preset = Preset.ScanpyV1 # logging _root_logger: ClassVar = _RootLogger(logging.INFO) _logfile: ClassVar = None diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 0382b5fe9f..1e5444cbba 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -528,7 +528,7 @@ def highly_variable_genes( # noqa: PLR0913 max_mean: float = 3, span: float = 0.3, n_bins: int = 20, - flavor: HVGFlavor = "seurat", + flavor: HVGFlavor | None = None, subset: bool = False, inplace: bool = True, batch_key: str | None = None, @@ -597,9 +597,10 @@ def highly_variable_genes( # noqa: PLR0913 the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. flavor - Choose the flavor for identifying highly variable genes. For the dispersion - based methods in their default workflows, Seurat passes the cutoffs whereas - Cell Ranger passes `n_top_genes`. + Choose the flavor for identifying highly variable genes + (default depends on :attr:`scanpy.settings.preset` property :attr:`~scanpy.Preset.highly_variable_genes`). + For the dispersion based methods in their default workflows, + `'seurat'` passes the cutoffs whereas `'cell_ranger'` passes `n_top_genes`. subset Inplace subset to highly-variable genes if `True` otherwise merely indicate highly variable genes. @@ -646,6 +647,11 @@ def highly_variable_genes( # noqa: PLR0913 This function replaces :func:`~scanpy.pp.filter_genes_dispersion`. """ + if flavor is None: + from .. import settings + + flavor = settings.preset.highly_variable_genes + start = logg.info("extracting highly variable genes") if not isinstance(adata, AnnData): From 28a5ea3ac641ad52e936903df22462bf1ecf5fbf Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 13:08:23 +0200 Subject: [PATCH 18/42] fix broken test --- tests/test_readwrite.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index aff26b79d3..fcaedf13fd 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -78,9 +78,12 @@ def test_write( assert sc.settings.file_format_data == ff return # return early case "default", _: - monkeypatch.setattr(sc.settings, "file_format_data", ext) - with ctx: - sc.write("test", adata) + sc.settings.file_format_data, old = ext, sc.settings.file_format_data + try: + with ctx: + sc.write("test", adata) + finally: + sc.settings.file_format_data = old d = sc.settings.writedir case _: pytest.fail("add branch for new style") From 8335ff93b7026aa51149d5bf2bc08484534ff222 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 13:28:38 +0200 Subject: [PATCH 19/42] make into decorator --- src/scanpy/_settings.py | 65 ++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index c3701991d5..bee7e757a9 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -4,10 +4,11 @@ import sys from contextlib import contextmanager from enum import EnumMeta, IntEnum +from functools import wraps from logging import getLevelNamesMapping from pathlib import Path from time import time -from typing import TYPE_CHECKING, Literal, LiteralString, TypeVar, get_args +from typing import TYPE_CHECKING, Literal, LiteralString, ParamSpec, TypeVar, get_args from . import logging from ._compat import deprecated, old_positionals @@ -15,8 +16,9 @@ from .logging import _RootLogger, _set_log_file, _set_log_level if TYPE_CHECKING: - from collections.abc import Generator, Iterable - from typing import Any, ClassVar, Self, TextIO + from collections.abc import Callable, Generator, Iterable + from types import UnionType + from typing import ClassVar, Concatenate, Self, TextIO # Collected from the print_* functions in matplotlib.backends _Format = ( @@ -27,7 +29,11 @@ _VerbosityName = Literal["error", "warning", "info", "hint", "debug"] _LoggingLevelName = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "HINT", "DEBUG"] -T = TypeVar("T", bound=LiteralString) +L = TypeVar("L", bound=LiteralString) +S = TypeVar("S") +T = TypeVar("T") +P = ParamSpec("P") +R = TypeVar("R") AnnDataFileFormat = Literal["h5ad", "zarr"] @@ -98,18 +104,35 @@ def override( settings.verbosity = self -def _type_check(var: Any, varname: str, types: type | tuple[type, ...]) -> None: +def _type_check(var: object, name: str, types: type | UnionType) -> None: if isinstance(var, types): return if isinstance(types, type): possible_types_str = types.__name__ else: - type_names = [t.__name__ for t in types] + type_names = [t.__name__ for t in get_args(types)] possible_types_str = f"{', '.join(type_names[:-1])} or {type_names[-1]}" - msg = f"{varname} must be of type {possible_types_str}" + msg = f"{name} must be of type {possible_types_str}" raise TypeError(msg) +def _type_check_arg2( + types: type | UnionType, +) -> Callable[[Callable[Concatenate[S, T, P], R]], Callable[Concatenate[S, T, P], R]]: + def decorator( + func: Callable[Concatenate[S, T, P], R], + ) -> Callable[Concatenate[S, T, P], R]: + @wraps(func) + def wrapped(self: S, var: T, *args: P.args, **kwargs: P.kwargs) -> R: + __tracebackhide__ = True + _type_check(var, func.__name__, types) + return func(self, var, *args, **kwargs) + + return wrapped + + return decorator + + class SettingsMeta(SingletonMeta): # logging _root_logger: _RootLogger @@ -167,7 +190,7 @@ def verbosity(cls, verbosity: Verbosity | _VerbosityName | int) -> None: raise ValueError(msg) cls._verbosity = Verbosity(verbosity_str_options.index(verbosity)) else: - _type_check(verbosity, "verbosity", (str, int)) + _type_check(verbosity, "verbosity", str | int) _set_log_level(cls, _VERBOSITY_TO_LOGLEVEL[cls._verbosity.name]) @property @@ -176,8 +199,8 @@ def N_PCS(cls) -> int: return cls._n_pcs @N_PCS.setter + @_type_check_arg2(int) def N_PCS(cls, n_pcs: int) -> None: - _type_check(n_pcs, "n_pcs", int) cls._n_pcs = n_pcs @property @@ -186,8 +209,8 @@ def plot_suffix(cls) -> str: return cls._plot_suffix @plot_suffix.setter + @_type_check_arg2(str) def plot_suffix(cls, plot_suffix: str) -> None: - _type_check(plot_suffix, "plot_suffix", str) cls._plot_suffix = plot_suffix @property @@ -196,8 +219,8 @@ def file_format_data(cls) -> AnnDataFileFormat: return cls._file_format_data @file_format_data.setter + @_type_check_arg2(str) def file_format_data(cls, file_format: AnnDataFileFormat) -> None: - _type_check(file_format, "file_format_data", str) if file_format not in (file_format_options := get_args(AnnDataFileFormat)): msg = ( f"Cannot set file_format_data to {file_format}. " @@ -216,8 +239,8 @@ def file_format_figs(cls) -> str: return cls._file_format_figs @file_format_figs.setter + @_type_check_arg2(str) def file_format_figs(self, figure_format: str) -> None: - _type_check(figure_format, "figure_format_data", str) self._file_format_figs = figure_format @property @@ -229,8 +252,8 @@ def autosave(cls) -> bool: return cls._autosave @autosave.setter + @_type_check_arg2(bool) def autosave(cls, autosave: bool) -> None: - _type_check(autosave, "autosave", bool) cls._autosave = autosave @property @@ -242,8 +265,8 @@ def autoshow(cls) -> bool: return cls._autoshow @autoshow.setter + @_type_check_arg2(bool) def autoshow(cls, autoshow: bool) -> None: - _type_check(autoshow, "autoshow", bool) cls._autoshow = autoshow @property @@ -252,8 +275,8 @@ def writedir(cls) -> Path: return cls._writedir @writedir.setter + @_type_check_arg2(Path | str) def writedir(cls, writedir: Path | str) -> None: - _type_check(writedir, "writedir", (str, Path)) cls._writedir = Path(writedir) @property @@ -262,8 +285,8 @@ def cachedir(cls) -> Path: return cls._cachedir @cachedir.setter + @_type_check_arg2(Path | str) def cachedir(cls, cachedir: Path | str) -> None: - _type_check(cachedir, "cachedir", (str, Path)) cls._cachedir = Path(cachedir) @property @@ -272,8 +295,8 @@ def datasetdir(cls) -> Path: return cls._datasetdir @datasetdir.setter + @_type_check_arg2(Path | str) def datasetdir(cls, datasetdir: Path | str) -> None: - _type_check(datasetdir, "datasetdir", (str, Path)) cls._datasetdir = Path(datasetdir).resolve() @property @@ -282,8 +305,8 @@ def figdir(cls) -> Path: return cls._figdir @figdir.setter + @_type_check_arg2(Path | str) def figdir(cls, figdir: Path | str) -> None: - _type_check(figdir, "figdir", (str, Path)) cls._figdir = Path(figdir) @property @@ -310,8 +333,8 @@ def max_memory(cls) -> int | float: return cls._max_memory @max_memory.setter + @_type_check_arg2(int | float) def max_memory(cls, max_memory: float) -> None: - _type_check(max_memory, "max_memory", (int, float)) cls._max_memory = max_memory @property @@ -325,8 +348,8 @@ def n_jobs(cls) -> int: return cls._n_jobs @n_jobs.setter + @_type_check_arg2(int) def n_jobs(cls, n_jobs: int) -> None: - _type_check(n_jobs, "n_jobs", int) cls._n_jobs = n_jobs @property @@ -335,8 +358,8 @@ def logpath(cls) -> Path | None: return cls._logpath @logpath.setter + @_type_check_arg2(Path | str) def logpath(cls, logpath: Path | str | None) -> None: - _type_check(logpath, "logfile", (str, Path)) if logpath is None: cls._logfile = None cls._logpath = None From 49421237c9069f4b6793e0d78e10f196e680a12e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 6 Jun 2025 14:31:52 +0200 Subject: [PATCH 20/42] relnote --- docs/release-notes/3672.doc.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/release-notes/3672.doc.md diff --git a/docs/release-notes/3672.doc.md b/docs/release-notes/3672.doc.md new file mode 100644 index 0000000000..3d6d8e3201 --- /dev/null +++ b/docs/release-notes/3672.doc.md @@ -0,0 +1 @@ +Fix documentation location for {class}`scanpy.settings` {smaller}`P Angerer` From 3a7d95a4c5598330fd7dc4ddbb819aa80d566246 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 10 Jun 2025 11:03:35 +0200 Subject: [PATCH 21/42] comment on stack magic --- docs/extensions/autosummary_skip_inherited.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/extensions/autosummary_skip_inherited.py b/docs/extensions/autosummary_skip_inherited.py index 376f4c4e79..cfee4fe9e3 100644 --- a/docs/extensions/autosummary_skip_inherited.py +++ b/docs/extensions/autosummary_skip_inherited.py @@ -29,6 +29,8 @@ def skip_inherited( # noqa: PLR0917 # find parent class for frame, _ in walk_stack(None): + # Sadly `autodoc-skip-member` doesn’t give access to the parent object, so we need to do this. + # Find this stack frame: https://github.com/sphinx-doc/sphinx/blob/a5366394ae527712c4edfeb07a5fbeecd4ca72e1/sphinx/ext/autosummary/generate.py#L496-L517 if frame.f_code.co_name == "_get_members" and frame.f_code.co_filename.endswith( "/generate.py" ): From 12f7807ddd18b071b4abaf03bf85a4a1982a628f Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 10 Jun 2025 11:09:15 +0200 Subject: [PATCH 22/42] simplify `documenting()` --- src/scanpy/_singleton.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/scanpy/_singleton.py b/src/scanpy/_singleton.py index 60fdd5f247..61feaafe45 100644 --- a/src/scanpy/_singleton.py +++ b/src/scanpy/_singleton.py @@ -6,14 +6,9 @@ def documenting() -> bool: - """Return whether this is being called from Sphinx. - - (but not when e.g. a log message is accessing `settings.verbosity`) - """ + """Return whether this is being called from Sphinx.""" if not os.environ.get("SPHINX_RUNNING"): return False - found = False - logging = False for frame in extract_stack(): # Let any sphinx ext get the docstring if frame.name in { @@ -23,10 +18,8 @@ def documenting() -> bool: "get_object_members", # Class level of autodoc "import_object", # Attr level of autodoc }: - found = True - if frame.filename.endswith("/logging.py"): - logging = True - return found and not logging + return True + return False class SingletonMeta(type): From c8a023c7e6db8425ae97c00758276df8916984c8 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 10 Jun 2025 11:19:12 +0200 Subject: [PATCH 23/42] simpler skipping --- src/scanpy/_singleton.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/scanpy/_singleton.py b/src/scanpy/_singleton.py index 61feaafe45..06590e2b7d 100644 --- a/src/scanpy/_singleton.py +++ b/src/scanpy/_singleton.py @@ -30,9 +30,8 @@ def __new__(mcls, cls_name: str, *args, **kwargs): if documenting(): props = {} for name in dir(cls): - if name.startswith("_") or name in cls.__dict__: + if (attr := getattr(mcls, name, None)) is None: continue - attr = getattr(mcls, name) if isinstance(attr, FunctionType | MethodType): # Circumvent https://github.com/tox-dev/sphinx-autodoc-typehints/pull/157 setattr(cls, name, getattr(cls, name)) From f9228c612b604e2a8d92a00a44a55ca6f1a6250a Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 10 Jun 2025 13:05:47 +0200 Subject: [PATCH 24/42] comment inherited impl Co-authored-by: Ilan Gold --- docs/extensions/autosummary_skip_inherited.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/extensions/autosummary_skip_inherited.py b/docs/extensions/autosummary_skip_inherited.py index cfee4fe9e3..eebacd195c 100644 --- a/docs/extensions/autosummary_skip_inherited.py +++ b/docs/extensions/autosummary_skip_inherited.py @@ -48,7 +48,7 @@ def skip_inherited( # noqa: PLR0917 return None typ = type(typ) - # skip since we know it’s not a member of the parent class + # Skip this `name` because the `parent` `type` lacks the `name`, which indicates `name` does not belong to `parent` return True From 36d28d235d36e9c418ba2b116fb9291075a1bbf5 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 10 Jun 2025 13:32:05 +0200 Subject: [PATCH 25/42] metaclass docs Co-authored-by: Ilan Gold --- docs/extensions/autosummary_skip_inherited.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/extensions/autosummary_skip_inherited.py b/docs/extensions/autosummary_skip_inherited.py index eebacd195c..82353862ce 100644 --- a/docs/extensions/autosummary_skip_inherited.py +++ b/docs/extensions/autosummary_skip_inherited.py @@ -46,6 +46,8 @@ def skip_inherited( # noqa: PLR0917 while typ is not type: if name in typ.__dict__: return None + # Metaclasses need the while loop to trace back to their parents. + # See `SingletonMeta` and the documentation of `settings` for an example. typ = type(typ) # Skip this `name` because the `parent` `type` lacks the `name`, which indicates `name` does not belong to `parent` From 22ed905d6452f3f6b5c6cb2576b9e9cd58459b5a Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 10 Jun 2025 14:14:35 +0200 Subject: [PATCH 26/42] merged version of notebooks --- notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks b/notebooks index a883f10ed4..37c26f37f1 160000 --- a/notebooks +++ b/notebooks @@ -1 +1 @@ -Subproject commit a883f10ed412bd45c27e665a3e26069d89b31418 +Subproject commit 37c26f37f199ac70449fff970bb6007c1c9959c8 From 8fa86c7b478267bdd5e15650c93a59acf5018867 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 12 Jun 2025 12:23:15 +0200 Subject: [PATCH 27/42] override preset --- src/scanpy/_settings.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 147ab94492..38d9c4f782 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -98,6 +98,24 @@ def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: Preset.SeuratV5: "seurat_v3", } + @contextmanager + def override(self, preset: Preset) -> Generator[Preset, None, None]: + """Temporarily override :attr:`scanpy.settings.preset`. + + >>> import scanpy as sc + >>> sc.settings.preset = sc.Preset.ScanpyV1 + >>> with sc.settings.preset.override(sc.Preset.SeuratV5): + ... sc.settings.preset + + >>> sc.settings.preset + + """ + settings.preset = preset + try: + yield self + finally: + settings.preset = self + for _postprocess in _preset_postprocessors: _postprocess() @@ -164,8 +182,10 @@ def override( """ settings.verbosity = verbosity - yield self - settings.verbosity = self + try: + yield self + finally: + settings.verbosity = self def _type_check(var: object, name: str, types: type | UnionType) -> None: From 84b441f7193fdc7ecb9cef89f698fd8575a6b047 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 12 Jun 2025 13:28:24 +0200 Subject: [PATCH 28/42] WIP filter_{genes,cells} --- src/scanpy/_settings.py | 30 +++++++-- src/scanpy/_types.py | 26 +++++++- src/scanpy/preprocessing/_recipes.py | 49 +++++++------- src/scanpy/preprocessing/_simple.py | 95 +++++++++++++++++----------- 4 files changed, 130 insertions(+), 70 deletions(-) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 38d9c4f782..7c9a5732b9 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -9,11 +9,12 @@ from logging import getLevelNamesMapping from pathlib import Path from time import time -from typing import TYPE_CHECKING, Literal, LiteralString, ParamSpec, TypeVar, get_args +from typing import TYPE_CHECKING, Literal, ParamSpec, TypeVar, get_args from . import logging from ._compat import deprecated, old_positionals from ._singleton import SingletonMeta +from ._types import FilterCellsCutoffs, FilterGenesCutoffs from .logging import _RootLogger, _set_log_file, _set_log_level if TYPE_CHECKING: @@ -32,7 +33,6 @@ _VerbosityName = Literal["error", "warning", "info", "hint", "debug"] _LoggingLevelName = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "HINT", "DEBUG"] -L = TypeVar("L", bound=LiteralString) S = TypeVar("S") T = TypeVar("T") P = ParamSpec("P") @@ -46,7 +46,7 @@ def _postprocess_preset_prop( - prop: cached_property[L], param: str, get_map: Callable[[], Mapping[Preset, L]] + prop: cached_property[T], param: str, get_map: Callable[[], Mapping[Preset, T]] ) -> None: map = get_map() @@ -63,10 +63,10 @@ def _postprocess_preset_prop( def _preset_property( param: str, -) -> Callable[[Callable[[], Mapping[Preset, L]]], cached_property[L]]: - def decorator(get_map: Callable[[], Mapping[Preset, L]]) -> cached_property[L]: +) -> Callable[[Callable[[], Mapping[Preset, T]]], cached_property[T]]: + def decorator(get_map: Callable[[], Mapping[Preset, T]]) -> cached_property[T]: @wraps(get_map) - def get(self: Preset) -> L: + def get(self: Preset) -> T: return get_map()[self] prop = cached_property(get) @@ -98,6 +98,24 @@ def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: Preset.SeuratV5: "seurat_v3", } + @_preset_property("{min,max}_{counts,genes}") + def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: + return { + Preset.ScanpyV1: FilterCellsCutoffs(None, None, None, None), + Preset.SeuratV5: FilterCellsCutoffs( + min_genes=200, min_counts=None, max_genes=None, max_counts=None + ), + } + + @_preset_property("{min,max}_{counts,cells}") + def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: + return { + Preset.ScanpyV1: FilterGenesCutoffs(None, None, None, None), + Preset.SeuratV5: FilterGenesCutoffs( + min_cells=3, min_counts=None, max_cells=None, max_counts=None + ), + } + @contextmanager def override(self, preset: Preset) -> Generator[Preset, None, None]: """Temporarily override :attr:`scanpy.settings.preset`. diff --git a/src/scanpy/_types.py b/src/scanpy/_types.py index af2d430fe4..3352f3af1d 100644 --- a/src/scanpy/_types.py +++ b/src/scanpy/_types.py @@ -1,7 +1,29 @@ from __future__ import annotations -from typing import Literal +from typing import Literal, NamedTuple -__all__ = ["HVGFlavor"] +__all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGFlavor"] HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] + + +class FilterCellsCutoffs(NamedTuple): + min_genes: int | None + min_counts: int | None + max_genes: int | None + max_counts: int | None + + @property + def n(self) -> int: + return sum([i is not None for i in self]) + + +class FilterGenesCutoffs(NamedTuple): + min_cells: int | None + min_counts: int | None + max_cells: int | None + max_counts: int | None + + @property + def n(self) -> int: + return sum([i is not None for i in self]) diff --git a/src/scanpy/preprocessing/_recipes.py b/src/scanpy/preprocessing/_recipes.py index 420c6904a0..94946f51a3 100644 --- a/src/scanpy/preprocessing/_recipes.py +++ b/src/scanpy/preprocessing/_recipes.py @@ -5,13 +5,7 @@ from typing import TYPE_CHECKING from .. import logging as logg -from .. import preprocessing as pp from .._compat import CSBase, old_positionals -from ._deprecated.highly_variable_genes import ( - filter_genes_cv_deprecated, - filter_genes_dispersion, -) -from ._normalization import normalize_total if TYPE_CHECKING: from anndata import AnnData @@ -54,7 +48,9 @@ def recipe_weinreb17( Return a copy if true. """ + from .. import pp from ._deprecated import normalize_per_cell_weinreb16_deprecated, zscore_deprecated + from ._deprecated.highly_variable_genes import filter_genes_cv_deprecated if isinstance(adata.X, CSBase): msg = "`recipe_weinreb16 does not support sparse matrices." @@ -102,24 +98,24 @@ def recipe_seurat( Return a copy if true. """ + from .. import Preset, pl, pp, settings + from ._deprecated.highly_variable_genes import filter_genes_dispersion + if copy: adata = adata.copy() - pp.filter_cells(adata, min_genes=200) - pp.filter_genes(adata, min_cells=3) - normalize_total(adata, target_sum=1e4) - filter_result = filter_genes_dispersion( - adata.X, min_mean=0.0125, max_mean=3, min_disp=0.5, log=not log - ) - if plot: - from ..plotting import ( - _preprocessing as ppp, - ) - ppp.filter_genes_dispersion(filter_result, log=not log) - adata._inplace_subset_var(filter_result.gene_subset) # filter genes - if log: - pp.log1p(adata) - pp.scale(adata, max_value=10) + with settings.preset.override(Preset.SeuratV5): + pp.filter_cells(adata) + pp.filter_genes(adata) + pp.normalize_total(adata, target_sum=1e4) + filter_result = filter_genes_dispersion(adata.X, log=not log) + if plot: + pl.filter_genes_dispersion(filter_result, log=not log) + adata._inplace_subset_var(filter_result.gene_subset) # filter genes + if log: + pp.log1p(adata) + pp.scale(adata, max_value=10) + return adata if copy else None @@ -175,24 +171,25 @@ def recipe_zheng17( Returns or updates `adata` depending on `copy`. """ + from .. import pl, pp + from ._deprecated.highly_variable_genes import filter_genes_dispersion + start = logg.info("running recipe zheng17") if copy: adata = adata.copy() # only consider genes with more than 1 count pp.filter_genes(adata, min_counts=1) # normalize with total UMI count per cell - normalize_total(adata, key_added="n_counts_all") + pp.normalize_total(adata, key_added="n_counts_all") filter_result = filter_genes_dispersion( adata.X, flavor="cell_ranger", n_top_genes=n_top_genes, log=False ) if plot: # should not import at the top of the file - from ..plotting import _preprocessing as ppp - - ppp.filter_genes_dispersion(filter_result, log=True) + pl.filter_genes_dispersion(filter_result, log=True) # actually filter the genes, the following is the inplace version of # adata = adata[:, filter_result.gene_subset] adata._inplace_subset_var(filter_result.gene_subset) # filter genes - normalize_total(adata) # renormalize after filtering + pp.normalize_total(adata) # renormalize after filtering if log: pp.log1p(adata) # log transform: X = log(X + 1) pp.scale(adata) diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index a8fb274203..6c2442269f 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -21,6 +21,7 @@ from .. import logging as logg from .._compat import CSBase, CSRBase, DaskArray, deprecated, njit, old_positionals from .._settings import settings as sett +from .._types import FilterCellsCutoffs, FilterGenesCutoffs from .._utils import ( _check_array_function_arguments, _resolve_axis, @@ -71,7 +72,8 @@ def filter_cells( `min_genes` genes expressed. This is to filter measurement outliers, i.e. “unreliable” observations. - Only provide one of the optional parameters `min_counts`, `min_genes`, + Unless you use a :attr:`scanpy.settings.preset`, + only provide one of the optional parameters `min_counts`, `min_genes`, `max_counts`, `max_genes` per call. Parameters @@ -138,10 +140,18 @@ def filter_cells( """ if copy: logg.warning("`copy` is deprecated, use `inplace` instead.") - n_given_options = sum( - option is not None for option in [min_genes, min_counts, max_genes, max_counts] + cutoffs = FilterCellsCutoffs( + min_counts=min_counts, + min_genes=min_genes, + max_counts=max_counts, + max_genes=max_genes, ) - if n_given_options != 1: + del min_counts, min_genes, max_counts, max_genes + if cutoffs.n == 0: + from .. import settings + + cutoffs = settings.preset.filter_cells + if cutoffs.n != 1: msg = ( "Only provide one of the optional parameters `min_counts`, " "`min_genes`, `max_counts`, `max_genes` per call." @@ -153,25 +163,25 @@ def filter_cells( cell_subset, number = materialize_as_ndarray( filter_cells( adata.X, - min_counts=min_counts, - min_genes=min_genes, - max_counts=max_counts, - max_genes=max_genes, + min_counts=cutoffs.min_counts, + min_genes=cutoffs.min_genes, + max_counts=cutoffs.max_counts, + max_genes=cutoffs.max_genes, ), ) if not inplace: return cell_subset, number - if min_genes is None and max_genes is None: + if cutoffs.min_genes is None and cutoffs.max_genes is None: adata.obs["n_counts"] = number else: adata.obs["n_genes"] = number adata._inplace_subset_obs(cell_subset) return adata if copy else None X = data # proceed with processing the data matrix - min_number = min_counts if min_genes is None else min_genes - max_number = max_counts if max_genes is None else max_genes + min_number = cutoffs.min_counts if cutoffs.min_genes is None else cutoffs.min_genes + max_number = cutoffs.max_counts if cutoffs.max_genes is None else cutoffs.max_genes number_per_cell = stats.sum( - X if min_genes is None and max_genes is None else X > 0, axis=1 + X if cutoffs.min_genes is None and cutoffs.max_genes is None else X > 0, axis=1 ) if min_number is not None: cell_subset = number_per_cell >= min_number @@ -181,19 +191,19 @@ def filter_cells( s = stats.sum(~cell_subset) if s > 0: msg = f"filtered out {s} cells that have " - if min_genes is not None or min_counts is not None: + if cutoffs.min_genes is not None or cutoffs.min_counts is not None: msg += "less than " msg += ( - f"{min_genes} genes expressed" - if min_counts is None - else f"{min_counts} counts" + f"{cutoffs.min_genes} genes expressed" + if cutoffs.min_counts is None + else f"{cutoffs.min_counts} counts" ) - if max_genes is not None or max_counts is not None: + if cutoffs.max_genes is not None or cutoffs.max_counts is not None: msg += "more than " msg += ( - f"{max_genes} genes expressed" - if max_counts is None - else f"{max_counts} counts" + f"{cutoffs.max_genes} genes expressed" + if cutoffs.max_counts is None + else f"{cutoffs.max_counts} counts" ) logg.info(msg) return cell_subset, number_per_cell @@ -218,7 +228,8 @@ def filter_genes( least `min_cells` cells or have at most `max_counts` counts or are expressed in at most `max_cells` cells. - Only provide one of the optional parameters `min_counts`, `min_cells`, + Unless you use a :attr:`scanpy.settings.preset`, + only provide one of the optional parameters `min_counts`, `min_cells`, `max_counts`, `max_cells` per call. Parameters @@ -252,10 +263,18 @@ def filter_genes( """ if copy: logg.warning("`copy` is deprecated, use `inplace` instead.") - n_given_options = sum( - option is not None for option in [min_cells, min_counts, max_cells, max_counts] + cutoffs = FilterGenesCutoffs( + min_counts=min_counts, + min_cells=min_cells, + max_counts=max_counts, + max_cells=max_cells, ) - if n_given_options != 1: + del min_counts, min_cells, max_counts, max_cells + if cutoffs.n == 0: + from .. import settings + + cutoffs = settings.preset.filter_genes + if cutoffs.n != 1: msg = ( "Only provide one of the optional parameters `min_counts`, " "`min_cells`, `max_counts`, `max_cells` per call." @@ -268,15 +287,15 @@ def filter_genes( gene_subset, number = materialize_as_ndarray( filter_genes( adata.X, - min_cells=min_cells, - min_counts=min_counts, - max_cells=max_cells, - max_counts=max_counts, + min_cells=cutoffs.min_cells, + min_counts=cutoffs.min_counts, + max_cells=cutoffs.max_cells, + max_counts=cutoffs.max_counts, ) ) if not inplace: return gene_subset, number - if min_cells is None and max_cells is None: + if cutoffs.min_cells is None and cutoffs.max_cells is None: adata.var["n_counts"] = number else: adata.var["n_cells"] = number @@ -284,10 +303,10 @@ def filter_genes( return adata if copy else None X = data # proceed with processing the data matrix - min_number = min_counts if min_cells is None else min_cells - max_number = max_counts if max_cells is None else max_cells + min_number = cutoffs.min_counts if cutoffs.min_cells is None else cutoffs.min_cells + max_number = cutoffs.max_counts if cutoffs.max_cells is None else cutoffs.max_cells number_per_gene = stats.sum( - X if min_cells is None and max_cells is None else X > 0, axis=0 + X if cutoffs.min_cells is None and cutoffs.max_cells is None else X > 0, axis=0 ) if min_number is not None: gene_subset = number_per_gene >= min_number @@ -297,15 +316,19 @@ def filter_genes( s = stats.sum(~gene_subset) if s > 0: msg = f"filtered out {s} genes that are detected " - if min_cells is not None or min_counts is not None: + if cutoffs.min_cells is not None or cutoffs.min_counts is not None: msg += "in less than " msg += ( - f"{min_cells} cells" if min_counts is None else f"{min_counts} counts" + f"{cutoffs.min_cells} cells" + if cutoffs.min_counts is None + else f"{cutoffs.min_counts} counts" ) - if max_cells is not None or max_counts is not None: + if cutoffs.max_cells is not None or cutoffs.max_counts is not None: msg += "in more than " msg += ( - f"{max_cells} cells" if max_counts is None else f"{max_counts} counts" + f"{cutoffs.max_cells} cells" + if cutoffs.max_counts is None + else f"{cutoffs.max_counts} counts" ) logg.info(msg) return gene_subset, number_per_gene From 44b14e9af354bee732eb6740735ec233004fdd64 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 12 Jun 2025 13:42:09 +0200 Subject: [PATCH 29/42] fix docs --- docs/conf.py | 2 ++ src/scanpy/_settings.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 62b95bdee2..3e94828a0a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -219,6 +219,8 @@ def setup(app: Sphinx): "scanpy.plotting._matrixplot.MatrixPlot": "scanpy.pl.MatrixPlot", "scanpy.plotting._dotplot.DotPlot": "scanpy.pl.DotPlot", "scanpy.plotting._stacked_violin.StackedViolin": "scanpy.pl.StackedViolin", + "scanpy._types.FilterCellsCutoffs": "tuple", + "scanpy._types.FilterGenesCutoffs": "tuple", "pandas.core.series.Series": "pandas.Series", "numpy.bool_": "numpy.bool", # Since numpy 2, numpy.bool is the canonical dtype } diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 7c9a5732b9..a58dc5f204 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -100,6 +100,7 @@ def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: @_preset_property("{min,max}_{counts,genes}") def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: + """Cutoffs for :func:`~scanpy.pp.filter_cells`.""" return { Preset.ScanpyV1: FilterCellsCutoffs(None, None, None, None), Preset.SeuratV5: FilterCellsCutoffs( @@ -109,6 +110,7 @@ def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: @_preset_property("{min,max}_{counts,cells}") def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: + """Cutoffs for :func:`~scanpy.pp.filter_genes`.""" return { Preset.ScanpyV1: FilterGenesCutoffs(None, None, None, None), Preset.SeuratV5: FilterGenesCutoffs( From 2f4c8a4e610277fe68e5f57d16646daa8b0916d1 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 12 Jun 2025 16:48:28 +0200 Subject: [PATCH 30/42] param sets --- docs/conf.py | 5 +- src/scanpy/_param_sets.py | 35 ++++++++++++ src/scanpy/_settings.py | 57 +++++++++++-------- src/scanpy/_types.py | 29 ---------- .../preprocessing/_highly_variable_genes.py | 2 +- src/scanpy/preprocessing/_simple.py | 2 +- 6 files changed, 72 insertions(+), 58 deletions(-) create mode 100644 src/scanpy/_param_sets.py delete mode 100644 src/scanpy/_types.py diff --git a/docs/conf.py b/docs/conf.py index 3e94828a0a..009f0d4853 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -219,8 +219,9 @@ def setup(app: Sphinx): "scanpy.plotting._matrixplot.MatrixPlot": "scanpy.pl.MatrixPlot", "scanpy.plotting._dotplot.DotPlot": "scanpy.pl.DotPlot", "scanpy.plotting._stacked_violin.StackedViolin": "scanpy.pl.StackedViolin", - "scanpy._types.FilterCellsCutoffs": "tuple", - "scanpy._types.FilterGenesCutoffs": "tuple", + "scanpy._param_sets.HVGFlavor": "tuple", + "scanpy._param_sets.FilterCellsCutoffs": "tuple", + "scanpy._param_sets.FilterGenesCutoffs": "tuple", "pandas.core.series.Series": "pandas.Series", "numpy.bool_": "numpy.bool", # Since numpy 2, numpy.bool is the canonical dtype } diff --git a/src/scanpy/_param_sets.py b/src/scanpy/_param_sets.py new file mode 100644 index 0000000000..f8e94dc7b3 --- /dev/null +++ b/src/scanpy/_param_sets.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, NamedTuple + +if TYPE_CHECKING: + from typing import Literal + + +__all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGFlavor"] + + +class HVGFlavor(NamedTuple): + flavor: Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] + + +class FilterCellsCutoffs(NamedTuple): + min_genes: int | None = None + min_counts: int | None = None + max_genes: int | None = None + max_counts: int | None = None + + @property + def n(self) -> int: + return sum([i is not None for i in self]) + + +class FilterGenesCutoffs(NamedTuple): + min_cells: int | None = None + min_counts: int | None = None + max_cells: int | None = None + max_counts: int | None = None + + @property + def n(self) -> int: + return sum([i is not None for i in self]) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index a58dc5f204..2690cf1f5c 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -9,12 +9,12 @@ from logging import getLevelNamesMapping from pathlib import Path from time import time -from typing import TYPE_CHECKING, Literal, ParamSpec, TypeVar, get_args +from typing import TYPE_CHECKING, Literal, NamedTuple, ParamSpec, TypeVar, get_args from . import logging from ._compat import deprecated, old_positionals +from ._param_sets import FilterCellsCutoffs, FilterGenesCutoffs, HVGFlavor from ._singleton import SingletonMeta -from ._types import FilterCellsCutoffs, FilterGenesCutoffs from .logging import _RootLogger, _set_log_file, _set_log_level if TYPE_CHECKING: @@ -22,8 +22,6 @@ from types import UnionType from typing import ClassVar, Concatenate, Self, TextIO - from ._types import HVGFlavor - # Collected from the print_* functions in matplotlib.backends _Format = ( Literal["png", "jpg", "tif", "tiff"] # noqa: PYI030 @@ -33,6 +31,7 @@ _VerbosityName = Literal["error", "warning", "info", "hint", "debug"] _LoggingLevelName = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "HINT", "DEBUG"] +NT = TypeVar("NT", bound=NamedTuple) S = TypeVar("S") T = TypeVar("T") P = ParamSpec("P") @@ -45,8 +44,16 @@ _preset_postprocessors: list[Callable[[], None]] = [] +def _non_defaults(nt: NamedTuple) -> Generator[tuple[str, object], None, None]: + cls = type(nt) + for param in cls._fields: + value = getattr(nt, param) + if param not in cls._field_defaults or value != cls._field_defaults[param]: + yield param, value + + def _postprocess_preset_prop( - prop: cached_property[T], param: str, get_map: Callable[[], Mapping[Preset, T]] + prop: cached_property[NT], get_map: Callable[[], Mapping[Preset, NT]] ) -> None: map = get_map() @@ -54,28 +61,28 @@ def _postprocess_preset_prop( value_type = re.fullmatch(r"Mapping\[Preset, (.*)\]", map_type)[1] added_doc = "\n".join( - f":attr:`{k.name}`\n Default: `{param}={v!r}`" for k, v in map.items() + ":attr:`{name}`\n Defaults: {defaults}".format( + name=k.name, + defaults=", ".join( + f"`{param}={default!r}`" for param, default in _non_defaults(params) + ) + or "none", + ) + for k, params in map.items() ) prop.__doc__ = f"{prop.__doc__}\n\n{added_doc}" prop.func.__annotations__["return"] = value_type -def _preset_property( - param: str, -) -> Callable[[Callable[[], Mapping[Preset, T]]], cached_property[T]]: - def decorator(get_map: Callable[[], Mapping[Preset, T]]) -> cached_property[T]: - @wraps(get_map) - def get(self: Preset) -> T: - return get_map()[self] - - prop = cached_property(get) - _preset_postprocessors.append( - partial(_postprocess_preset_prop, prop, param, get_map) - ) - return prop +def _preset_property(get_map: Callable[[], Mapping[Preset, NT]]) -> cached_property[NT]: + @wraps(get_map) + def get(self: Preset) -> NT: + return get_map()[self] - return decorator + prop = cached_property(get) + _preset_postprocessors.append(partial(_postprocess_preset_prop, prop, get_map)) + return prop class Preset(StrEnum): @@ -90,15 +97,15 @@ class Preset(StrEnum): SeuratV5 = auto() """Try to match Seurat 5.* as closely as possible.""" - @_preset_property("flavor") + @_preset_property def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" return { - Preset.ScanpyV1: "seurat", - Preset.SeuratV5: "seurat_v3", + Preset.ScanpyV1: HVGFlavor(flavor="seurat"), + Preset.SeuratV5: HVGFlavor(flavor="seurat_v3"), } - @_preset_property("{min,max}_{counts,genes}") + @_preset_property def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: """Cutoffs for :func:`~scanpy.pp.filter_cells`.""" return { @@ -108,7 +115,7 @@ def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: ), } - @_preset_property("{min,max}_{counts,cells}") + @_preset_property def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: """Cutoffs for :func:`~scanpy.pp.filter_genes`.""" return { diff --git a/src/scanpy/_types.py b/src/scanpy/_types.py deleted file mode 100644 index 3352f3af1d..0000000000 --- a/src/scanpy/_types.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import annotations - -from typing import Literal, NamedTuple - -__all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGFlavor"] - -HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] - - -class FilterCellsCutoffs(NamedTuple): - min_genes: int | None - min_counts: int | None - max_genes: int | None - max_counts: int | None - - @property - def n(self) -> int: - return sum([i is not None for i in self]) - - -class FilterGenesCutoffs(NamedTuple): - min_cells: int | None - min_counts: int | None - max_cells: int | None - max_counts: int | None - - @property - def n(self) -> int: - return sum([i is not None for i in self]) diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 1e5444cbba..91519224b5 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -24,7 +24,7 @@ from numpy.typing import NDArray - from .._types import HVGFlavor + from .._param_sets import HVGFlavor def _highly_variable_genes_seurat_v3( # noqa: PLR0912, PLR0915 diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index 6c2442269f..4520f8beb2 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -20,8 +20,8 @@ from .. import logging as logg from .._compat import CSBase, CSRBase, DaskArray, deprecated, njit, old_positionals +from .._param_sets import FilterCellsCutoffs, FilterGenesCutoffs from .._settings import settings as sett -from .._types import FilterCellsCutoffs, FilterGenesCutoffs from .._utils import ( _check_array_function_arguments, _resolve_axis, From d0daf38576f435ccd35047a386702db237b15a2e Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 13 Jun 2025 12:16:39 +0200 Subject: [PATCH 31/42] extract --- src/scanpy/_param_sets.py | 35 --- .../{_settings.py => _settings/__init__.py} | 234 ++---------------- src/scanpy/_settings/presets.py | 156 ++++++++++++ src/scanpy/_settings/verbosity.py | 84 +++++++ src/scanpy/logging.py | 6 +- .../preprocessing/_highly_variable_genes.py | 2 +- src/scanpy/preprocessing/_simple.py | 6 +- 7 files changed, 272 insertions(+), 251 deletions(-) delete mode 100644 src/scanpy/_param_sets.py rename src/scanpy/{_settings.py => _settings/__init__.py} (68%) create mode 100644 src/scanpy/_settings/presets.py create mode 100644 src/scanpy/_settings/verbosity.py diff --git a/src/scanpy/_param_sets.py b/src/scanpy/_param_sets.py deleted file mode 100644 index f8e94dc7b3..0000000000 --- a/src/scanpy/_param_sets.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, NamedTuple - -if TYPE_CHECKING: - from typing import Literal - - -__all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGFlavor"] - - -class HVGFlavor(NamedTuple): - flavor: Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] - - -class FilterCellsCutoffs(NamedTuple): - min_genes: int | None = None - min_counts: int | None = None - max_genes: int | None = None - max_counts: int | None = None - - @property - def n(self) -> int: - return sum([i is not None for i in self]) - - -class FilterGenesCutoffs(NamedTuple): - min_cells: int | None = None - min_counts: int | None = None - max_cells: int | None = None - max_counts: int | None = None - - @property - def n(self) -> int: - return sum([i is not None for i in self]) diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings/__init__.py similarity index 68% rename from src/scanpy/_settings.py rename to src/scanpy/_settings/__init__.py index 2690cf1f5c..042b4d5381 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings/__init__.py @@ -1,37 +1,33 @@ from __future__ import annotations import inspect -import re import sys -from contextlib import contextmanager -from enum import EnumMeta, IntEnum, StrEnum, auto -from functools import cached_property, partial, wraps -from logging import getLevelNamesMapping +from functools import wraps from pathlib import Path from time import time -from typing import TYPE_CHECKING, Literal, NamedTuple, ParamSpec, TypeVar, get_args +from typing import TYPE_CHECKING, Literal, ParamSpec, TypeVar, get_args -from . import logging -from ._compat import deprecated, old_positionals -from ._param_sets import FilterCellsCutoffs, FilterGenesCutoffs, HVGFlavor -from ._singleton import SingletonMeta -from .logging import _RootLogger, _set_log_file, _set_log_level +from .. import logging +from .._compat import deprecated, old_positionals +from .._singleton import SingletonMeta +from ..logging import _RootLogger, _set_log_file, _set_log_level +from .presets import Preset +from .verbosity import Verbosity if TYPE_CHECKING: - from collections.abc import Callable, Generator, Iterable, Mapping + from collections.abc import Callable, Iterable from types import UnionType from typing import ClassVar, Concatenate, Self, TextIO + from .verbosity import _VerbosityName + # Collected from the print_* functions in matplotlib.backends _Format = ( Literal["png", "jpg", "tif", "tiff"] # noqa: PYI030 | Literal["pdf", "ps", "eps", "svg", "svgz", "pgf"] | Literal["raw", "rgba"] ) - _VerbosityName = Literal["error", "warning", "info", "hint", "debug"] - _LoggingLevelName = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "HINT", "DEBUG"] -NT = TypeVar("NT", bound=NamedTuple) S = TypeVar("S") T = TypeVar("T") P = ParamSpec("P") @@ -41,180 +37,6 @@ AnnDataFileFormat = Literal["h5ad", "zarr"] -_preset_postprocessors: list[Callable[[], None]] = [] - - -def _non_defaults(nt: NamedTuple) -> Generator[tuple[str, object], None, None]: - cls = type(nt) - for param in cls._fields: - value = getattr(nt, param) - if param not in cls._field_defaults or value != cls._field_defaults[param]: - yield param, value - - -def _postprocess_preset_prop( - prop: cached_property[NT], get_map: Callable[[], Mapping[Preset, NT]] -) -> None: - map = get_map() - - map_type = inspect.signature(get_map).return_annotation - value_type = re.fullmatch(r"Mapping\[Preset, (.*)\]", map_type)[1] - - added_doc = "\n".join( - ":attr:`{name}`\n Defaults: {defaults}".format( - name=k.name, - defaults=", ".join( - f"`{param}={default!r}`" for param, default in _non_defaults(params) - ) - or "none", - ) - for k, params in map.items() - ) - - prop.__doc__ = f"{prop.__doc__}\n\n{added_doc}" - prop.func.__annotations__["return"] = value_type - - -def _preset_property(get_map: Callable[[], Mapping[Preset, NT]]) -> cached_property[NT]: - @wraps(get_map) - def get(self: Preset) -> NT: - return get_map()[self] - - prop = cached_property(get) - _preset_postprocessors.append(partial(_postprocess_preset_prop, prop, get_map)) - return prop - - -class Preset(StrEnum): - """Presets for :attr:`scanpy.settings.preset`. - - See properties below for details. - """ - - ScanpyV1 = auto() - """Scanpy 1.*’s default settings.""" - - SeuratV5 = auto() - """Try to match Seurat 5.* as closely as possible.""" - - @_preset_property - def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: - """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" - return { - Preset.ScanpyV1: HVGFlavor(flavor="seurat"), - Preset.SeuratV5: HVGFlavor(flavor="seurat_v3"), - } - - @_preset_property - def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: - """Cutoffs for :func:`~scanpy.pp.filter_cells`.""" - return { - Preset.ScanpyV1: FilterCellsCutoffs(None, None, None, None), - Preset.SeuratV5: FilterCellsCutoffs( - min_genes=200, min_counts=None, max_genes=None, max_counts=None - ), - } - - @_preset_property - def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: - """Cutoffs for :func:`~scanpy.pp.filter_genes`.""" - return { - Preset.ScanpyV1: FilterGenesCutoffs(None, None, None, None), - Preset.SeuratV5: FilterGenesCutoffs( - min_cells=3, min_counts=None, max_cells=None, max_counts=None - ), - } - - @contextmanager - def override(self, preset: Preset) -> Generator[Preset, None, None]: - """Temporarily override :attr:`scanpy.settings.preset`. - - >>> import scanpy as sc - >>> sc.settings.preset = sc.Preset.ScanpyV1 - >>> with sc.settings.preset.override(sc.Preset.SeuratV5): - ... sc.settings.preset - - >>> sc.settings.preset - - """ - settings.preset = preset - try: - yield self - finally: - settings.preset = self - - -for _postprocess in _preset_postprocessors: - _postprocess() - - -_VERBOSITY_TO_LOGLEVEL: dict[int | _VerbosityName, _LoggingLevelName] = { - "error": "ERROR", - "warning": "WARNING", - "info": "INFO", - "hint": "HINT", - "debug": "DEBUG", -} -_VERBOSITY_TO_LOGLEVEL.update(dict(enumerate(list(_VERBOSITY_TO_LOGLEVEL.values())))) - - -class VerbosityMeta(EnumMeta): - @property - @deprecated("Use `Verbosity.warning` instead") - def warn(cls) -> Verbosity: - return Verbosity.warning - - -class Verbosity(IntEnum, metaclass=VerbosityMeta): - """Logging verbosity levels for :attr:`scanpy.settings.verbosity`.""" - - error = 0 - """Error (`0`)""" - warning = 1 - """Warning (`1`)""" - info = 2 - """Info (`2`)""" - hint = 3 - """Hint (`3`)""" - debug = 4 - """Debug (`4`)""" - - def __eq__(self, other: object) -> bool: - if isinstance(other, Verbosity): - return self is other - if isinstance(other, int): - return self.value == other - if isinstance(other, str): - return self.name == other - return NotImplemented - - @property - def level(self) -> int: - """The :ref:`logging level ` corresponding to this verbosity level.""" - m = getLevelNamesMapping() - return m[_VERBOSITY_TO_LOGLEVEL[self.name]] - - @contextmanager - def override( - self, verbosity: Verbosity | _VerbosityName | int - ) -> Generator[Verbosity, None, None]: - """Temporarily override verbosity. - - >>> import scanpy as sc - >>> sc.settings.verbosity = sc.Verbosity.info - >>> with sc.settings.verbosity.override(settings.verbosity.debug): - ... sc.settings.verbosity - - >>> sc.settings.verbosity - - """ - settings.verbosity = verbosity - try: - yield self - finally: - settings.verbosity = self - - def _type_check(var: object, name: str, types: type | UnionType) -> None: if isinstance(var, types): return @@ -294,25 +116,19 @@ def verbosity(cls) -> Verbosity: @verbosity.setter def verbosity(cls, verbosity: Verbosity | _VerbosityName | int) -> None: - verbosity_str_options: list[_VerbosityName] = [ - v for v in _VERBOSITY_TO_LOGLEVEL if isinstance(v, str) - ] - if isinstance(verbosity, Verbosity): - cls._verbosity = verbosity - elif isinstance(verbosity, int): - cls._verbosity = Verbosity(verbosity) - elif isinstance(verbosity, str): - verbosity = verbosity.lower() - if verbosity not in verbosity_str_options: - msg = ( - f"Cannot set verbosity to {verbosity}. " - f"Accepted string values are: {verbosity_str_options}" - ) - raise ValueError(msg) - cls._verbosity = Verbosity(verbosity_str_options.index(verbosity)) - else: - _type_check(verbosity, "verbosity", str | int) - _set_log_level(cls, _VERBOSITY_TO_LOGLEVEL[cls._verbosity.name]) + try: + cls._verbosity = ( + Verbosity[verbosity.lower()] + if isinstance(verbosity, str) + else Verbosity(verbosity) + ) + except KeyError: + msg = ( + f"Cannot set verbosity to {verbosity}. " + f"Accepted string values are: {Verbosity.__members__.keys()}" + ) + raise ValueError(msg) from None + _set_log_level(cls, cls._verbosity.level) @property def N_PCS(cls) -> int: @@ -621,7 +437,7 @@ def _set_figure_params( # noqa: PLR0913 rcParams["figure.facecolor"] = facecolor rcParams["axes.facecolor"] = facecolor if scanpy: - from .plotting._rcmod import set_rcParams_scanpy + from ..plotting._rcmod import set_rcParams_scanpy set_rcParams_scanpy(fontsize=fontsize, color_map=color_map) if figsize is not None: diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py new file mode 100644 index 0000000000..2e9ef6c489 --- /dev/null +++ b/src/scanpy/_settings/presets.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import inspect +import re +from contextlib import contextmanager +from enum import StrEnum, auto +from functools import cached_property, partial, wraps +from typing import TYPE_CHECKING, Literal, NamedTuple, TypeVar + +if TYPE_CHECKING: + from collections.abc import Callable, Generator, Mapping + from typing import Literal + +NT = TypeVar("NT", bound=NamedTuple) + +__all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGFlavor", "Preset"] + + +class HVGFlavor(NamedTuple): + flavor: Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] + + +class FilterCellsCutoffs(NamedTuple): + min_genes: int | None = None + min_counts: int | None = None + max_genes: int | None = None + max_counts: int | None = None + + @property + def n(self) -> int: + return sum([i is not None for i in self]) + + +class FilterGenesCutoffs(NamedTuple): + min_cells: int | None = None + min_counts: int | None = None + max_cells: int | None = None + max_counts: int | None = None + + @property + def n(self) -> int: + return sum([i is not None for i in self]) + + +preset_postprocessors: list[Callable[[], None]] = [] + + +def named_tuple_non_defaults( + nt: NamedTuple, +) -> Generator[tuple[str, object], None, None]: + cls = type(nt) + for param in cls._fields: + value = getattr(nt, param) + if param not in cls._field_defaults or value != cls._field_defaults[param]: + yield param, value + + +def postprocess_preset_prop( + prop: cached_property[NT], get_map: Callable[[], Mapping[Preset, NT]] +) -> None: + map = get_map() + + map_type = inspect.signature(get_map).return_annotation + m = re.fullmatch(r"Mapping\[Preset, (.*)\]", map_type) + assert m is not None + value_type = m[1] + + added_doc = "\n".join( + ":attr:`{name}`\n Defaults: {defaults}".format( + name=k.name, + defaults=", ".join( + f"`{param}={default!r}`" + for param, default in named_tuple_non_defaults(params) + ) + or "none", + ) + for k, params in map.items() + ) + + prop.__doc__ = f"{prop.__doc__}\n\n{added_doc}" + prop.func.__annotations__["return"] = value_type + + +def preset_property(get_map: Callable[[], Mapping[Preset, NT]]) -> cached_property[NT]: + @wraps(get_map) + def get(self: Preset) -> NT: + return get_map()[self] + + prop = cached_property(get) + preset_postprocessors.append(partial(postprocess_preset_prop, prop, get_map)) + return prop + + +class Preset(StrEnum): + """Presets for :attr:`scanpy.settings.preset`. + + See properties below for details. + """ + + ScanpyV1 = auto() + """Scanpy 1.*’s default settings.""" + + SeuratV5 = auto() + """Try to match Seurat 5.* as closely as possible.""" + + @preset_property + def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: + """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" + return { + Preset.ScanpyV1: HVGFlavor(flavor="seurat"), + Preset.SeuratV5: HVGFlavor(flavor="seurat_v3"), + } + + @preset_property + def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: + """Cutoffs for :func:`~scanpy.pp.filter_cells`.""" + return { + Preset.ScanpyV1: FilterCellsCutoffs(None, None, None, None), + Preset.SeuratV5: FilterCellsCutoffs( + min_genes=200, min_counts=None, max_genes=None, max_counts=None + ), + } + + @preset_property + def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: + """Cutoffs for :func:`~scanpy.pp.filter_genes`.""" + return { + Preset.ScanpyV1: FilterGenesCutoffs(None, None, None, None), + Preset.SeuratV5: FilterGenesCutoffs( + min_cells=3, min_counts=None, max_cells=None, max_counts=None + ), + } + + @contextmanager + def override(self, preset: Preset) -> Generator[Preset, None, None]: + """Temporarily override :attr:`scanpy.settings.preset`. + + >>> import scanpy as sc + >>> sc.settings.preset = sc.Preset.ScanpyV1 + >>> with sc.settings.preset.override(sc.Preset.SeuratV5): + ... sc.settings.preset + + >>> sc.settings.preset + + """ + from scanpy import settings + + settings.preset = preset + try: + yield self + finally: + settings.preset = self + + +for postprocess in preset_postprocessors: + postprocess() diff --git a/src/scanpy/_settings/verbosity.py b/src/scanpy/_settings/verbosity.py new file mode 100644 index 0000000000..8704f1a160 --- /dev/null +++ b/src/scanpy/_settings/verbosity.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from contextlib import contextmanager +from enum import EnumMeta, IntEnum +from logging import getLevelNamesMapping +from typing import TYPE_CHECKING + +from .._compat import deprecated + +if TYPE_CHECKING: + from collections.abc import Generator + from typing import Literal + + _VerbosityName = Literal["error", "warning", "info", "hint", "debug"] + _LoggingLevelName = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "HINT", "DEBUG"] + + +_VERBOSITY_TO_LOGLEVEL: dict[int | _VerbosityName, _LoggingLevelName] = { + "error": "ERROR", + "warning": "WARNING", + "info": "INFO", + "hint": "HINT", + "debug": "DEBUG", +} +_VERBOSITY_TO_LOGLEVEL.update(dict(enumerate(list(_VERBOSITY_TO_LOGLEVEL.values())))) + + +class VerbosityMeta(EnumMeta): + @property + @deprecated("Use `Verbosity.warning` instead") + def warn(cls) -> Verbosity: + return Verbosity.warning + + +class Verbosity(IntEnum, metaclass=VerbosityMeta): + """Logging verbosity levels for :attr:`scanpy.settings.verbosity`.""" + + error = 0 + """Error (`0`)""" + warning = 1 + """Warning (`1`)""" + info = 2 + """Info (`2`)""" + hint = 3 + """Hint (`3`)""" + debug = 4 + """Debug (`4`)""" + + def __eq__(self, other: object) -> bool: + if isinstance(other, Verbosity): + return self is other + if isinstance(other, int): + return self.value == other + if isinstance(other, str): + return self.name == other + return NotImplemented + + @property + def level(self) -> int: + """The :ref:`logging level ` corresponding to this verbosity level.""" + m = getLevelNamesMapping() + return m[_VERBOSITY_TO_LOGLEVEL[self.name]] + + @contextmanager + def override( + self, verbosity: Verbosity | _VerbosityName | int + ) -> Generator[Verbosity, None, None]: + """Temporarily override verbosity. + + >>> import scanpy as sc + >>> sc.settings.verbosity = sc.Verbosity.info + >>> with sc.settings.verbosity.override(settings.verbosity.debug): + ... sc.settings.verbosity + + >>> sc.settings.verbosity + + """ + from scanpy import settings + + settings.verbosity = verbosity + try: + yield self + finally: + settings.verbosity = self diff --git a/src/scanpy/logging.py b/src/scanpy/logging.py index 232062a3fe..ed4cb9478e 100644 --- a/src/scanpy/logging.py +++ b/src/scanpy/logging.py @@ -18,7 +18,7 @@ from session_info2 import SessionInfo - from ._settings import settings + from ._settings import SettingsMeta # This is currently the only documented API @@ -74,7 +74,7 @@ def debug(self, msg, *, time=None, deep=None, extra=None) -> datetime: return self.log(DEBUG, msg, time=time, deep=deep, extra=extra) -def _set_log_file(settings: settings) -> None: +def _set_log_file(settings: SettingsMeta) -> None: file = settings.logfile name = settings.logpath root = settings._root_logger @@ -86,7 +86,7 @@ def _set_log_file(settings: settings) -> None: root.addHandler(h) -def _set_log_level(settings: settings, level: int) -> None: +def _set_log_level(settings: SettingsMeta, level: int) -> None: root = settings._root_logger root.setLevel(level) for h in list(root.handlers): diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 91519224b5..f5eab8ba50 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -24,7 +24,7 @@ from numpy.typing import NDArray - from .._param_sets import HVGFlavor + from .._settings.presets import HVGFlavor def _highly_variable_genes_seurat_v3( # noqa: PLR0912, PLR0915 diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index 4520f8beb2..0d8a9595a4 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -20,8 +20,8 @@ from .. import logging as logg from .._compat import CSBase, CSRBase, DaskArray, deprecated, njit, old_positionals -from .._param_sets import FilterCellsCutoffs, FilterGenesCutoffs -from .._settings import settings as sett +from .._settings import settings +from .._settings.presets import FilterCellsCutoffs, FilterGenesCutoffs from .._utils import ( _check_array_function_arguments, _resolve_axis, @@ -747,7 +747,7 @@ def regress_out( if isinstance(X, CSBase): logg.info(" sparse input is densified and may lead to high memory use") - n_jobs = sett.n_jobs if n_jobs is None else n_jobs + n_jobs = settings.n_jobs if n_jobs is None else n_jobs # regress on a single categorical variable variable_is_categorical = False From 43408387f0c387d757deb0e7ececbe3956be5218 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 13 Jun 2025 13:10:48 +0200 Subject: [PATCH 32/42] fix test --- src/scanpy/_settings/verbosity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/_settings/verbosity.py b/src/scanpy/_settings/verbosity.py index 8704f1a160..3261fdcfe9 100644 --- a/src/scanpy/_settings/verbosity.py +++ b/src/scanpy/_settings/verbosity.py @@ -69,7 +69,7 @@ def override( >>> import scanpy as sc >>> sc.settings.verbosity = sc.Verbosity.info - >>> with sc.settings.verbosity.override(settings.verbosity.debug): + >>> with sc.settings.verbosity.override(sc.settings.verbosity.debug): ... sc.settings.verbosity >>> sc.settings.verbosity From bd348462cc9675fab851bb352359cc7b81587941 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 13 Jun 2025 13:26:12 +0200 Subject: [PATCH 33/42] relnote and scanpy v2 preset --- docs/release-notes/3653.feature.md | 1 + src/scanpy/_settings/presets.py | 22 +++++++++++++------ .../preprocessing/_highly_variable_genes.py | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 docs/release-notes/3653.feature.md diff --git a/docs/release-notes/3653.feature.md b/docs/release-notes/3653.feature.md new file mode 100644 index 0000000000..377f971276 --- /dev/null +++ b/docs/release-notes/3653.feature.md @@ -0,0 +1 @@ +Add {attr}`scanpy.settings.preset` setting with two new presets: {attr}`~scanpy.settings.Preset.SeuratV5` and {attr}`~scanpy.settings.Preset.ScanpyV2Preview`. {smaller}`P Angerer` diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 2e9ef6c489..e9b2ff4298 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -9,15 +9,17 @@ if TYPE_CHECKING: from collections.abc import Callable, Generator, Mapping - from typing import Literal NT = TypeVar("NT", bound=NamedTuple) -__all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGFlavor", "Preset"] +__all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGPreset", "Preset"] -class HVGFlavor(NamedTuple): - flavor: Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] +HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] + + +class HVGPreset(NamedTuple): + flavor: HVGFlavor class FilterCellsCutoffs(NamedTuple): @@ -100,15 +102,19 @@ class Preset(StrEnum): ScanpyV1 = auto() """Scanpy 1.*’s default settings.""" + ScanpyV2Preview = auto() + """Scanpy 2.*’s feature default settings. (Preview: subject to change!)""" + SeuratV5 = auto() """Try to match Seurat 5.* as closely as possible.""" @preset_property - def highly_variable_genes() -> Mapping[Preset, HVGFlavor]: + def highly_variable_genes() -> Mapping[Preset, HVGPreset]: """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" return { - Preset.ScanpyV1: HVGFlavor(flavor="seurat"), - Preset.SeuratV5: HVGFlavor(flavor="seurat_v3"), + Preset.ScanpyV1: HVGPreset(flavor="seurat"), + Preset.ScanpyV2Preview: HVGPreset(flavor="seurat_v3_paper"), + Preset.SeuratV5: HVGPreset(flavor="seurat_v3_paper"), } @preset_property @@ -116,6 +122,7 @@ def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: """Cutoffs for :func:`~scanpy.pp.filter_cells`.""" return { Preset.ScanpyV1: FilterCellsCutoffs(None, None, None, None), + Preset.ScanpyV2Preview: FilterCellsCutoffs(None, None, None, None), Preset.SeuratV5: FilterCellsCutoffs( min_genes=200, min_counts=None, max_genes=None, max_counts=None ), @@ -126,6 +133,7 @@ def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: """Cutoffs for :func:`~scanpy.pp.filter_genes`.""" return { Preset.ScanpyV1: FilterGenesCutoffs(None, None, None, None), + Preset.ScanpyV2Preview: FilterGenesCutoffs(None, None, None, None), Preset.SeuratV5: FilterGenesCutoffs( min_cells=3, min_counts=None, max_cells=None, max_counts=None ), diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index f5eab8ba50..b6e7c21a87 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -650,7 +650,7 @@ def highly_variable_genes( # noqa: PLR0913 if flavor is None: from .. import settings - flavor = settings.preset.highly_variable_genes + flavor = settings.preset.highly_variable_genes.flavor start = logg.info("extracting highly variable genes") From 67af4e0a397951ec06f682b904b4ddcdd7f67f44 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 13 Jun 2025 14:37:23 +0200 Subject: [PATCH 34/42] add rank_genes_groups preset --- src/scanpy/_settings/presets.py | 14 ++++++++++++++ src/scanpy/tools/_rank_genes_groups.py | 17 +++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index e9b2ff4298..45cbb47eaf 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -15,6 +15,7 @@ __all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGPreset", "Preset"] +DETest = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] @@ -44,6 +45,10 @@ def n(self) -> int: return sum([i is not None for i in self]) +class RankGenesGroupsPreset(NamedTuple): + method: DETest + + preset_postprocessors: list[Callable[[], None]] = [] @@ -139,6 +144,15 @@ def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: ), } + @preset_property + def rank_genes_groups() -> Mapping[Preset, RankGenesGroupsPreset]: + """Correlation method for :func:`~scanpy.tl.rank_genes_groups`.""" + return { + Preset.ScanpyV1: RankGenesGroupsPreset(method="t-test"), + Preset.ScanpyV2Preview: RankGenesGroupsPreset(method="wilcoxon"), + Preset.SeuratV5: RankGenesGroupsPreset(method="wilcoxon"), + } + @contextmanager def override(self, preset: Preset) -> Generator[Preset, None, None]: """Temporarily override :attr:`scanpy.settings.preset`. diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 2a9669a11f..e966bd43fd 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING import numba import numpy as np @@ -13,6 +13,7 @@ from .. import _utils from .. import logging as logg from .._compat import CSBase, njit, old_positionals +from .._settings.presets import DETest from .._utils import ( check_nonnegative_integers, get_literal_vals, @@ -22,6 +23,7 @@ if TYPE_CHECKING: from collections.abc import Generator, Iterable + from typing import Literal from anndata import AnnData from numpy.typing import NDArray @@ -29,9 +31,6 @@ _CorrMethod = Literal["benjamini-hochberg", "bonferroni"] -# Used with get_literal_vals -_Method = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] - _CONST_MAX_SIZE = 10000000 @@ -420,7 +419,7 @@ def logreg( def compute_statistics( # noqa: PLR0912 self, - method: _Method, + method: DETest, *, corr_method: _CorrMethod = "benjamini-hochberg", n_genes_user: int | None = None, @@ -517,7 +516,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 pts: bool = False, key_added: str | None = None, copy: bool = False, - method: _Method | None = None, + method: DETest | None = None, corr_method: _CorrMethod = "benjamini-hochberg", tie_correct: bool = False, layer: str | None = None, @@ -630,13 +629,15 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 raise ValueError(msg) if method is None: - method = "t-test" + from scanpy import settings + + method = settings.preset.rank_genes_groups.method if "only_positive" in kwds: rankby_abs = not kwds.pop("only_positive") # backwards compat start = logg.info("ranking genes") - if method not in (avail_methods := get_literal_vals(_Method)): + if method not in (avail_methods := get_literal_vals(DETest)): msg = f"Method must be one of {avail_methods}." raise ValueError(msg) From 77279a96a562d7b790cec7ca1441c1067095f6ef Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Sun, 15 Jun 2025 12:30:45 +0200 Subject: [PATCH 35/42] WIP RNG --- src/scanpy/_settings/presets.py | 16 ++++++++++++++++ src/scanpy/preprocessing/_pca/__init__.py | 12 ++++++++++-- src/scanpy/preprocessing/_simple.py | 2 +- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 45cbb47eaf..758943a66a 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -10,6 +10,8 @@ if TYPE_CHECKING: from collections.abc import Callable, Generator, Mapping + from .._utils.random import RNGLike, SeedLike + NT = TypeVar("NT", bound=NamedTuple) __all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGPreset", "Preset"] @@ -45,6 +47,11 @@ def n(self) -> int: return sum([i is not None for i in self]) +class Pca(NamedTuple): + key_added: str | None + rng: RNGLike | SeedLike | None + + class RankGenesGroupsPreset(NamedTuple): method: DETest @@ -144,6 +151,15 @@ def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: ), } + @preset_property + def pca() -> Mapping[Preset, Pca]: + """Settings for :func:`~scanpy.pp.pca`.""" # noqa: D401 + return { + Preset.ScanpyV1: Pca(key_added=None, rng=0), + Preset.ScanpyV2Preview: Pca(key_added="pca", rng=None), + Preset.SeuratV5: Pca(), # TODO + } + @preset_property def rank_genes_groups() -> Mapping[Preset, RankGenesGroupsPreset]: """Correlation method for :func:`~scanpy.tl.rank_genes_groups`.""" diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index 3b34845789..eb5a5d71bb 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -28,7 +28,7 @@ from numpy.typing import DTypeLike, NDArray from ..._utils import Empty - from ..._utils.random import _LegacyRandom + from ..._utils.random import RNGLike, SeedLike, _LegacyRandom MethodDaskML = type[dmld.PCA | dmld.IncrementalPCA | dmld.TruncatedSVD] MethodSklearn = type[skld.PCA | skld.TruncatedSVD] @@ -68,13 +68,15 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 svd_solver: SvdSolver | None = None, chunked: bool = False, chunk_size: int | None = None, - random_state: _LegacyRandom = 0, + rng: SeedLike | RNGLike | None = None, return_info: bool = False, mask_var: NDArray[np.bool_] | str | None | Empty = _empty, use_highly_variable: bool | None = None, dtype: DTypeLike = "float32", key_added: str | None = None, copy: bool = False, + # deprecated + random_state: _LegacyRandom = 0, ) -> AnnData | np.ndarray | CSBase | None: r"""Principal component analysis :cite:p:`Pedregosa2011`. @@ -205,6 +207,12 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 # Current chunking implementation relies on pca being called on X msg = "Cannot use `layer` and `chunked` at the same time." raise NotImplementedError(msg) + if key_added is None: + key_added = settings.preset.pca.key_added + if False: # TODO + if rng is None: + rng = settings.preset.pca.rng + rng = np.random.default_rng(rng) # chunked calculation is not randomized, anyways if svd_solver in {"auto", "randomized"} and not chunked: diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index 0d8a9595a4..54f5219fe3 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -873,7 +873,7 @@ def sample( fraction: float | None = None, *, n: int | None = None, - rng: RNGLike | SeedLike | None = 0, + rng: RNGLike | SeedLike | None = None, copy: Literal[False] = False, replace: bool = False, axis: Literal["obs", 0, "var", 1] = "obs", From c2cdf181ea5e715022929de6b8fc15433db0bf4f Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 16 Jun 2025 10:34:49 +0200 Subject: [PATCH 36/42] undo RNG stuff --- src/scanpy/_settings/presets.py | 8 +++----- src/scanpy/preprocessing/_pca/__init__.py | 10 ++-------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 758943a66a..fd1a3972c3 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -10,7 +10,6 @@ if TYPE_CHECKING: from collections.abc import Callable, Generator, Mapping - from .._utils.random import RNGLike, SeedLike NT = TypeVar("NT", bound=NamedTuple) @@ -49,7 +48,6 @@ def n(self) -> int: class Pca(NamedTuple): key_added: str | None - rng: RNGLike | SeedLike | None class RankGenesGroupsPreset(NamedTuple): @@ -155,9 +153,9 @@ def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: def pca() -> Mapping[Preset, Pca]: """Settings for :func:`~scanpy.pp.pca`.""" # noqa: D401 return { - Preset.ScanpyV1: Pca(key_added=None, rng=0), - Preset.ScanpyV2Preview: Pca(key_added="pca", rng=None), - Preset.SeuratV5: Pca(), # TODO + Preset.ScanpyV1: Pca(key_added=None), + Preset.ScanpyV2Preview: Pca(key_added="pca"), + Preset.SeuratV5: Pca(key_added="pca"), } @preset_property diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index eb5a5d71bb..b2ac381b7a 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -28,7 +28,7 @@ from numpy.typing import DTypeLike, NDArray from ..._utils import Empty - from ..._utils.random import RNGLike, SeedLike, _LegacyRandom + from ..._utils.random import _LegacyRandom MethodDaskML = type[dmld.PCA | dmld.IncrementalPCA | dmld.TruncatedSVD] MethodSklearn = type[skld.PCA | skld.TruncatedSVD] @@ -68,15 +68,13 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 svd_solver: SvdSolver | None = None, chunked: bool = False, chunk_size: int | None = None, - rng: SeedLike | RNGLike | None = None, + random_state: _LegacyRandom = 0, return_info: bool = False, mask_var: NDArray[np.bool_] | str | None | Empty = _empty, use_highly_variable: bool | None = None, dtype: DTypeLike = "float32", key_added: str | None = None, copy: bool = False, - # deprecated - random_state: _LegacyRandom = 0, ) -> AnnData | np.ndarray | CSBase | None: r"""Principal component analysis :cite:p:`Pedregosa2011`. @@ -209,10 +207,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 raise NotImplementedError(msg) if key_added is None: key_added = settings.preset.pca.key_added - if False: # TODO - if rng is None: - rng = settings.preset.pca.rng - rng = np.random.default_rng(rng) # chunked calculation is not randomized, anyways if svd_solver in {"auto", "randomized"} and not chunked: From e8678fd8c4f505ddd7283aba43650b4b1d87177e Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 16 Jun 2025 11:52:27 +0200 Subject: [PATCH 37/42] leiden --- src/scanpy/_settings/presets.py | 24 +++++++++++---- src/scanpy/tools/_leiden.py | 52 ++++++++++++++++++--------------- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index fd1a3972c3..0651185b42 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -18,6 +18,7 @@ DETest = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] +LeidenFlavor = Literal["leidenalg", "igraph"] class HVGPreset(NamedTuple): @@ -46,7 +47,7 @@ def n(self) -> int: return sum([i is not None for i in self]) -class Pca(NamedTuple): +class PcaPreset(NamedTuple): key_added: str | None @@ -54,6 +55,10 @@ class RankGenesGroupsPreset(NamedTuple): method: DETest +class LeidenPreset(NamedTuple): + flavor: LeidenFlavor + + preset_postprocessors: list[Callable[[], None]] = [] @@ -150,12 +155,12 @@ def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: } @preset_property - def pca() -> Mapping[Preset, Pca]: + def pca() -> Mapping[Preset, PcaPreset]: """Settings for :func:`~scanpy.pp.pca`.""" # noqa: D401 return { - Preset.ScanpyV1: Pca(key_added=None), - Preset.ScanpyV2Preview: Pca(key_added="pca"), - Preset.SeuratV5: Pca(key_added="pca"), + Preset.ScanpyV1: PcaPreset(key_added=None), + Preset.ScanpyV2Preview: PcaPreset(key_added="pca"), + Preset.SeuratV5: PcaPreset(key_added="pca"), } @preset_property @@ -167,6 +172,15 @@ def rank_genes_groups() -> Mapping[Preset, RankGenesGroupsPreset]: Preset.SeuratV5: RankGenesGroupsPreset(method="wilcoxon"), } + @preset_property + def leiden() -> Mapping[Preset, LeidenPreset]: + """Flavor for :func:`~scanpy.tl.leiden`.""" + return { + Preset.ScanpyV1: LeidenPreset(flavor="leidenalg"), + Preset.ScanpyV2Preview: LeidenPreset(flavor="igraph"), + Preset.SeuratV5: LeidenPreset(flavor="leidenalg"), + } + @contextmanager def override(self, preset: Preset) -> Generator[Preset, None, None]: """Temporarily override :attr:`scanpy.settings.preset`. diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py index 5658721609..ecacbcdd88 100644 --- a/src/scanpy/tools/_leiden.py +++ b/src/scanpy/tools/_leiden.py @@ -13,11 +13,11 @@ if TYPE_CHECKING: from collections.abc import Sequence - from typing import Literal from anndata import AnnData from .._compat import CSBase + from .._settings.presets import LeidenFlavor from .._utils.random import _LegacyRandom try: # sphinx-autodoc-typehints + optional dependency @@ -43,7 +43,7 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915 neighbors_key: str | None = None, obsp: str | None = None, copy: bool = False, - flavor: Literal["leidenalg", "igraph"] = "leidenalg", + flavor: LeidenFlavor | None = None, **clustering_args, ) -> AnnData | None: """Cluster cells into subgroups :cite:p:`Traag2019`. @@ -118,28 +118,34 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915 and `n_iterations`. """ - if flavor not in {"igraph", "leidenalg"}: - msg = ( - f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed" - ) - raise ValueError(msg) - _utils.ensure_igraph() - if flavor == "igraph": - if directed: - msg = "Cannot use igraph’s leiden implementation with a directed graph." - raise ValueError(msg) - if partition_type is not None: - msg = "Do not pass in partition_type argument when using igraph." - raise ValueError(msg) - else: - try: - import leidenalg - - msg = 'In the future, the default backend for leiden will be igraph instead of leidenalg.\n\n To achieve the future defaults please pass: flavor="igraph" and n_iterations=2. directed must also be False to work with igraph\'s implementation.' + if flavor is None: + from scanpy import settings + + flavor = settings.preset.leiden.flavor + match flavor: + case "igraph": + _utils.ensure_igraph() + if directed: + msg = "Cannot use igraph’s leiden implementation with a directed graph." + raise ValueError(msg) + if partition_type is not None: + msg = "Do not pass in partition_type argument when using igraph." + raise ValueError(msg) + case "leidenalg": + try: + import leidenalg + except ImportError as e: + msg = "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`." + raise ImportError(msg) from e + msg = ( + "In the future, the default backend for leiden will be igraph instead of leidenalg.\n\n" + 'To achieve the future defaults please pass: `flavor="igraph"` and n_iterations=2. ' + "directed must also be False to work with igraph's implementation." + ) _utils.warn_once(msg, FutureWarning, stacklevel=3) - except ImportError as e: - msg = "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`." - raise ImportError(msg) from e + case _: + msg = f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed." + raise ValueError(msg) clustering_args = dict(clustering_args) start = logg.info("running Leiden clustering") From 6bfc3323f2c2bb4e47f80cdb0bc8389f65381aa8 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 16 Jun 2025 11:56:25 +0200 Subject: [PATCH 38/42] oops --- src/scanpy/tools/_leiden.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py index ecacbcdd88..ca2b958597 100644 --- a/src/scanpy/tools/_leiden.py +++ b/src/scanpy/tools/_leiden.py @@ -122,9 +122,9 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915 from scanpy import settings flavor = settings.preset.leiden.flavor + _utils.ensure_igraph() # we need igraph regardless of `flavor` match flavor: case "igraph": - _utils.ensure_igraph() if directed: msg = "Cannot use igraph’s leiden implementation with a directed graph." raise ValueError(msg) From 10757faad93a2749fd77b51073723ec333050324 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 16 Jun 2025 13:55:47 +0200 Subject: [PATCH 39/42] add return_df --- src/scanpy/_settings/presets.py | 7 +- .../_deprecated/highly_variable_genes.py | 94 +++++++++---------- src/scanpy/tools/_umap.py | 3 +- 3 files changed, 49 insertions(+), 55 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 0651185b42..44d88b5fa0 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -23,6 +23,7 @@ class HVGPreset(NamedTuple): flavor: HVGFlavor + return_df: bool class FilterCellsCutoffs(NamedTuple): @@ -127,9 +128,9 @@ class Preset(StrEnum): def highly_variable_genes() -> Mapping[Preset, HVGPreset]: """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" return { - Preset.ScanpyV1: HVGPreset(flavor="seurat"), - Preset.ScanpyV2Preview: HVGPreset(flavor="seurat_v3_paper"), - Preset.SeuratV5: HVGPreset(flavor="seurat_v3_paper"), + Preset.ScanpyV1: HVGPreset(flavor="seurat", return_df=False), + Preset.ScanpyV2Preview: HVGPreset(flavor="seurat_v3_paper", return_df=True), + Preset.SeuratV5: HVGPreset(flavor="seurat_v3_paper", return_df=True), } @preset_property diff --git a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py index 87bf963ca5..270f1ca1e3 100644 --- a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py +++ b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py @@ -42,7 +42,8 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 log: bool = True, subset: bool = True, copy: bool = False, -) -> AnnData | np.recarray | None: + return_df: bool | None = None, +) -> AnnData | pd.DataFrame | np.recarray | None: """Extract highly variable genes :cite:p:`Satija2015,Zheng2017`. .. deprecated:: 1.3.6 @@ -117,8 +118,8 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 **dispersions_norm** : adata.var Normalized dispersions per gene. Logarithmized when `log` is `True`. - If a data matrix `X` is passed, the annotation is returned as `np.recarray` - with the same information stored in fields: `gene_subset`, `means`, `dispersions`, `dispersion_norm`. + If a data matrix `X` is passed, the annotation is returned as `pd.DataFrame` (if `return_df=True`) or `np.recarray` + with the same information stored in columns: `gene_subset`, `means`, `dispersions`, `dispersions_norm`. """ if n_top_genes is not None and not all( @@ -126,6 +127,10 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 ): msg = "If you pass `n_top_genes`, all cutoffs are ignored." warnings.warn(msg, UserWarning, stacklevel=2) + if return_df is None: + from scanpy import settings + + return_df = settings.preset.highly_variable_genes.return_df if min_disp is None: min_disp = 0.5 if min_mean is None: @@ -143,10 +148,10 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 max_mean=max_mean, n_top_genes=n_top_genes, flavor=flavor, - ) - adata.var["means"] = result["means"] - adata.var["dispersions"] = result["dispersions"] - adata.var["dispersions_norm"] = result["dispersions_norm"] + return_df=True, + ).set_index(adata.var_names) + assert isinstance(result, pd.DataFrame) + adata.var[cols] = result[cols := ["means", "dispersions", "dispersions_norm"]] if subset: adata._inplace_subset_var(result["gene_subset"]) else: @@ -154,21 +159,21 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 return adata if copy else None start = logg.info("extracting highly variable genes") X = data # no copy necessary, X remains unchanged in the following - mean, var = materialize_as_ndarray(mean_var(X, axis=0, correction=1)) + means, vars = materialize_as_ndarray(mean_var(X, axis=0, correction=1)) # now actually compute the dispersion - mean[mean == 0] = 1e-12 # set entries equal to zero to small value - dispersion = var / mean + means[means == 0] = 1e-12 # set entries equal to zero to small value + dispersions = vars / means if log: # logarithmized mean as in Seurat - dispersion[dispersion == 0] = np.nan - dispersion = np.log(dispersion) - mean = np.log1p(mean) + dispersions[dispersions == 0] = np.nan + dispersions = np.log(dispersions) + means = np.log1p(means) # all of the following quantities are "per-gene" here df = pd.DataFrame() - df["mean"] = mean - df["dispersion"] = dispersion + df["means"] = means + df["dispersions"] = dispersions if flavor == "seurat": - df["mean_bin"] = pd.cut(df["mean"], bins=n_bins) - disp_grouped = df.groupby("mean_bin", observed=True)["dispersion"] + df["mean_bin"] = pd.cut(df["means"], bins=n_bins) + disp_grouped = df.groupby("mean_bin", observed=True)["dispersions"] disp_mean_bin = disp_grouped.mean() disp_std_bin = disp_grouped.std(ddof=1) # retrieve those genes that have nan std, these are the ones where @@ -187,70 +192,59 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 disp_std_bin[one_gene_per_bin] = disp_mean_bin[one_gene_per_bin.values].values disp_mean_bin[one_gene_per_bin] = 0 # actually do the normalization - df["dispersion_norm"] = ( + df["dispersions_norm"] = ( # use values here as index differs - df["dispersion"].values - disp_mean_bin[df["mean_bin"].values].values + df["dispersions"].values - disp_mean_bin[df["mean_bin"].values].values ) / disp_std_bin[df["mean_bin"].values].values elif flavor == "cell_ranger": from statsmodels import robust df["mean_bin"] = pd.cut( - df["mean"], - np.r_[-np.inf, np.percentile(df["mean"], np.arange(10, 105, 5)), np.inf], + df["means"], + np.r_[-np.inf, np.percentile(df["means"], np.arange(10, 105, 5)), np.inf], ) - disp_grouped = df.groupby("mean_bin", observed=True)["dispersion"] + disp_grouped = df.groupby("mean_bin", observed=True)["dispersions"] disp_median_bin = disp_grouped.median() # the next line raises the warning: "Mean of empty slice" with warnings.catch_warnings(): warnings.simplefilter("ignore") disp_mad_bin = disp_grouped.apply(robust.mad) - df["dispersion_norm"] = ( + df["dispersions_norm"] = ( np.abs( - df["dispersion"].values - disp_median_bin[df["mean_bin"].values].values + df["dispersions"].values - disp_median_bin[df["mean_bin"].values].values ) / disp_mad_bin[df["mean_bin"].values].values ) else: msg = '`flavor` needs to be "seurat" or "cell_ranger"' raise ValueError(msg) - dispersion_norm = df["dispersion_norm"].values.astype("float32") + dispersions_norm = df["dispersions_norm"].values.astype("float32") if n_top_genes is not None: - dispersion_norm = dispersion_norm[~np.isnan(dispersion_norm)] - dispersion_norm[ - ::-1 - ].sort() # interestingly, np.argpartition is slightly slower - disp_cut_off = dispersion_norm[n_top_genes - 1] - gene_subset = df["dispersion_norm"].values >= disp_cut_off + dispersions_norm = dispersions_norm[~np.isnan(dispersions_norm)] + # interestingly, np.argpartition is slightly slower than this: + dispersions_norm[::-1].sort() + disp_cut_off = dispersions_norm[n_top_genes - 1] + gene_subset = df["dispersions_norm"].values >= disp_cut_off logg.debug( f"the {n_top_genes} top genes correspond to a " f"normalized dispersion cutoff of {disp_cut_off}" ) else: max_disp = np.inf if max_disp is None else max_disp - dispersion_norm[np.isnan(dispersion_norm)] = 0 # similar to Seurat + dispersions_norm[np.isnan(dispersions_norm)] = 0 # similar to Seurat gene_subset = np.logical_and.reduce( ( - mean > min_mean, - mean < max_mean, - dispersion_norm > min_disp, - dispersion_norm < max_disp, + means > min_mean, + means < max_mean, + dispersions_norm > min_disp, + dispersions_norm < max_disp, ) ) + df["gene_subset"] = gene_subset + df["dispersions_norm"] = df["dispersions_norm"].astype("float32") logg.info(" finished", time=start) - return np.rec.fromarrays( - ( - gene_subset, - df["mean"].values, - df["dispersion"].values, - df["dispersion_norm"].values.astype("float32", copy=False), - ), - dtype=[ - ("gene_subset", bool), - ("means", "float32"), - ("dispersions", "float32"), - ("dispersions_norm", "float32"), - ], - ) + rv = df[["gene_subset", "means", "dispersions", "dispersions_norm"]] + return rv if return_df else rv.to_records(index=False) def filter_genes_cv_deprecated(X, Ecutoff, cvFilter): diff --git a/src/scanpy/tools/_umap.py b/src/scanpy/tools/_umap.py index 7803616924..bf95995210 100644 --- a/src/scanpy/tools/_umap.py +++ b/src/scanpy/tools/_umap.py @@ -233,8 +233,7 @@ def umap( # noqa: PLR0913, PLR0915 ) elif method == "rapids": msg = ( - "`method='rapids'` is deprecated. " - "Use `rapids_singlecell.tl.louvain` instead." + "`method='rapids'` is deprecated. Use `rapids_singlecell.tl.umap` instead." ) warnings.warn(msg, FutureWarning, stacklevel=2) metric = neigh_params.get("metric", "euclidean") From 6166fec2cb308d57ec80b86bce97e6619c527d26 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 16 Jun 2025 17:09:15 +0200 Subject: [PATCH 40/42] oops --- src/scanpy/_settings/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/_settings/__init__.py b/src/scanpy/_settings/__init__.py index ef22a44210..6422da0fe3 100644 --- a/src/scanpy/_settings/__init__.py +++ b/src/scanpy/_settings/__init__.py @@ -346,7 +346,7 @@ def categories_to_ignore(cls, categories_to_ignore: Iterable[str]) -> None: @deprecated("Use `scanpy.set_figure_params` instead") def set_figure_params(cls, *args, **kwargs) -> None: - cls.set_figure_params(*args, **kwargs) + cls._set_figure_params(*args, **kwargs) @old_positionals( "scanpy", From 76a19c8819e3191400db2077dd2ce760edd43f4b Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 17 Jun 2025 16:59:15 +0200 Subject: [PATCH 41/42] revert cutoffs --- src/scanpy/_settings/presets.py | 55 +++------------- src/scanpy/preprocessing/_recipes.py | 27 ++++---- src/scanpy/preprocessing/_simple.py | 99 +++++++++++----------------- 3 files changed, 61 insertions(+), 120 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 44d88b5fa0..95081d3421 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -13,7 +13,16 @@ NT = TypeVar("NT", bound=NamedTuple) -__all__ = ["FilterCellsCutoffs", "FilterGenesCutoffs", "HVGPreset", "Preset"] +__all__ = [ + "DETest", + "HVGFlavor", + "HVGPreset", + "LeidenFlavor", + "LeidenPreset", + "PcaPreset", + "Preset", + "RankGenesGroupsPreset", +] DETest = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] @@ -26,28 +35,6 @@ class HVGPreset(NamedTuple): return_df: bool -class FilterCellsCutoffs(NamedTuple): - min_genes: int | None = None - min_counts: int | None = None - max_genes: int | None = None - max_counts: int | None = None - - @property - def n(self) -> int: - return sum([i is not None for i in self]) - - -class FilterGenesCutoffs(NamedTuple): - min_cells: int | None = None - min_counts: int | None = None - max_cells: int | None = None - max_counts: int | None = None - - @property - def n(self) -> int: - return sum([i is not None for i in self]) - - class PcaPreset(NamedTuple): key_added: str | None @@ -133,28 +120,6 @@ def highly_variable_genes() -> Mapping[Preset, HVGPreset]: Preset.SeuratV5: HVGPreset(flavor="seurat_v3_paper", return_df=True), } - @preset_property - def filter_cells() -> Mapping[Preset, FilterCellsCutoffs]: - """Cutoffs for :func:`~scanpy.pp.filter_cells`.""" - return { - Preset.ScanpyV1: FilterCellsCutoffs(None, None, None, None), - Preset.ScanpyV2Preview: FilterCellsCutoffs(None, None, None, None), - Preset.SeuratV5: FilterCellsCutoffs( - min_genes=200, min_counts=None, max_genes=None, max_counts=None - ), - } - - @preset_property - def filter_genes() -> Mapping[Preset, FilterGenesCutoffs]: - """Cutoffs for :func:`~scanpy.pp.filter_genes`.""" - return { - Preset.ScanpyV1: FilterGenesCutoffs(None, None, None, None), - Preset.ScanpyV2Preview: FilterGenesCutoffs(None, None, None, None), - Preset.SeuratV5: FilterGenesCutoffs( - min_cells=3, min_counts=None, max_cells=None, max_counts=None - ), - } - @preset_property def pca() -> Mapping[Preset, PcaPreset]: """Settings for :func:`~scanpy.pp.pca`.""" # noqa: D401 diff --git a/src/scanpy/preprocessing/_recipes.py b/src/scanpy/preprocessing/_recipes.py index 94946f51a3..e55296851f 100644 --- a/src/scanpy/preprocessing/_recipes.py +++ b/src/scanpy/preprocessing/_recipes.py @@ -98,24 +98,23 @@ def recipe_seurat( Return a copy if true. """ - from .. import Preset, pl, pp, settings + from .. import pl, pp from ._deprecated.highly_variable_genes import filter_genes_dispersion if copy: adata = adata.copy() - - with settings.preset.override(Preset.SeuratV5): - pp.filter_cells(adata) - pp.filter_genes(adata) - pp.normalize_total(adata, target_sum=1e4) - filter_result = filter_genes_dispersion(adata.X, log=not log) - if plot: - pl.filter_genes_dispersion(filter_result, log=not log) - adata._inplace_subset_var(filter_result.gene_subset) # filter genes - if log: - pp.log1p(adata) - pp.scale(adata, max_value=10) - + pp.filter_cells(adata, min_genes=200) + pp.filter_genes(adata, min_cells=3) + pp.normalize_total(adata, target_sum=1e4) + filter_result = filter_genes_dispersion( + adata.X, min_mean=0.0125, max_mean=3, min_disp=0.5, log=not log + ) + if plot: + pl.filter_genes_dispersion(filter_result, log=not log) + adata._inplace_subset_var(filter_result["gene_subset"]) # filter genes + if log: + pp.log1p(adata) + pp.scale(adata, max_value=10) return adata if copy else None diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index 54f5219fe3..6dc39e5087 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -21,7 +21,6 @@ from .. import logging as logg from .._compat import CSBase, CSRBase, DaskArray, deprecated, njit, old_positionals from .._settings import settings -from .._settings.presets import FilterCellsCutoffs, FilterGenesCutoffs from .._utils import ( _check_array_function_arguments, _resolve_axis, @@ -72,8 +71,7 @@ def filter_cells( `min_genes` genes expressed. This is to filter measurement outliers, i.e. “unreliable” observations. - Unless you use a :attr:`scanpy.settings.preset`, - only provide one of the optional parameters `min_counts`, `min_genes`, + Only provide one of the optional parameters `min_counts`, `min_genes`, `max_counts`, `max_genes` per call. Parameters @@ -140,20 +138,12 @@ def filter_cells( """ if copy: logg.warning("`copy` is deprecated, use `inplace` instead.") - cutoffs = FilterCellsCutoffs( - min_counts=min_counts, - min_genes=min_genes, - max_counts=max_counts, - max_genes=max_genes, + n_given_options = sum( + option is not None for option in [min_genes, min_counts, max_genes, max_counts] ) - del min_counts, min_genes, max_counts, max_genes - if cutoffs.n == 0: - from .. import settings - - cutoffs = settings.preset.filter_cells - if cutoffs.n != 1: + if n_given_options != 1: msg = ( - "Only provide one of the optional parameters `min_counts`, " + "Provide exactly one of the optional parameters `min_counts`, " "`min_genes`, `max_counts`, `max_genes` per call." ) raise ValueError(msg) @@ -163,25 +153,25 @@ def filter_cells( cell_subset, number = materialize_as_ndarray( filter_cells( adata.X, - min_counts=cutoffs.min_counts, - min_genes=cutoffs.min_genes, - max_counts=cutoffs.max_counts, - max_genes=cutoffs.max_genes, + min_counts=min_counts, + min_genes=min_genes, + max_counts=max_counts, + max_genes=max_genes, ), ) if not inplace: return cell_subset, number - if cutoffs.min_genes is None and cutoffs.max_genes is None: + if min_genes is None and max_genes is None: adata.obs["n_counts"] = number else: adata.obs["n_genes"] = number adata._inplace_subset_obs(cell_subset) return adata if copy else None X = data # proceed with processing the data matrix - min_number = cutoffs.min_counts if cutoffs.min_genes is None else cutoffs.min_genes - max_number = cutoffs.max_counts if cutoffs.max_genes is None else cutoffs.max_genes + min_number = min_counts if min_genes is None else min_genes + max_number = max_counts if max_genes is None else max_genes number_per_cell = stats.sum( - X if cutoffs.min_genes is None and cutoffs.max_genes is None else X > 0, axis=1 + X if min_genes is None and max_genes is None else X > 0, axis=1 ) if min_number is not None: cell_subset = number_per_cell >= min_number @@ -191,19 +181,19 @@ def filter_cells( s = stats.sum(~cell_subset) if s > 0: msg = f"filtered out {s} cells that have " - if cutoffs.min_genes is not None or cutoffs.min_counts is not None: + if min_genes is not None or min_counts is not None: msg += "less than " msg += ( - f"{cutoffs.min_genes} genes expressed" - if cutoffs.min_counts is None - else f"{cutoffs.min_counts} counts" + f"{min_genes} genes expressed" + if min_counts is None + else f"{min_counts} counts" ) - if cutoffs.max_genes is not None or cutoffs.max_counts is not None: + if max_genes is not None or max_counts is not None: msg += "more than " msg += ( - f"{cutoffs.max_genes} genes expressed" - if cutoffs.max_counts is None - else f"{cutoffs.max_counts} counts" + f"{max_genes} genes expressed" + if max_counts is None + else f"{max_counts} counts" ) logg.info(msg) return cell_subset, number_per_cell @@ -228,8 +218,7 @@ def filter_genes( least `min_cells` cells or have at most `max_counts` counts or are expressed in at most `max_cells` cells. - Unless you use a :attr:`scanpy.settings.preset`, - only provide one of the optional parameters `min_counts`, `min_cells`, + Only provide one of the optional parameters `min_counts`, `min_cells`, `max_counts`, `max_cells` per call. Parameters @@ -263,20 +252,12 @@ def filter_genes( """ if copy: logg.warning("`copy` is deprecated, use `inplace` instead.") - cutoffs = FilterGenesCutoffs( - min_counts=min_counts, - min_cells=min_cells, - max_counts=max_counts, - max_cells=max_cells, + n_given_options = sum( + option is not None for option in [min_cells, min_counts, max_cells, max_counts] ) - del min_counts, min_cells, max_counts, max_cells - if cutoffs.n == 0: - from .. import settings - - cutoffs = settings.preset.filter_genes - if cutoffs.n != 1: + if n_given_options != 1: msg = ( - "Only provide one of the optional parameters `min_counts`, " + "Provide exactly one of the optional parameters `min_counts`, " "`min_cells`, `max_counts`, `max_cells` per call." ) raise ValueError(msg) @@ -287,15 +268,15 @@ def filter_genes( gene_subset, number = materialize_as_ndarray( filter_genes( adata.X, - min_cells=cutoffs.min_cells, - min_counts=cutoffs.min_counts, - max_cells=cutoffs.max_cells, - max_counts=cutoffs.max_counts, + min_cells=min_cells, + min_counts=min_counts, + max_cells=max_cells, + max_counts=max_counts, ) ) if not inplace: return gene_subset, number - if cutoffs.min_cells is None and cutoffs.max_cells is None: + if min_cells is None and max_cells is None: adata.var["n_counts"] = number else: adata.var["n_cells"] = number @@ -303,10 +284,10 @@ def filter_genes( return adata if copy else None X = data # proceed with processing the data matrix - min_number = cutoffs.min_counts if cutoffs.min_cells is None else cutoffs.min_cells - max_number = cutoffs.max_counts if cutoffs.max_cells is None else cutoffs.max_cells + min_number = min_counts if min_cells is None else min_cells + max_number = max_counts if max_cells is None else max_cells number_per_gene = stats.sum( - X if cutoffs.min_cells is None and cutoffs.max_cells is None else X > 0, axis=0 + X if min_cells is None and max_cells is None else X > 0, axis=0 ) if min_number is not None: gene_subset = number_per_gene >= min_number @@ -316,19 +297,15 @@ def filter_genes( s = stats.sum(~gene_subset) if s > 0: msg = f"filtered out {s} genes that are detected " - if cutoffs.min_cells is not None or cutoffs.min_counts is not None: + if min_cells is not None or min_counts is not None: msg += "in less than " msg += ( - f"{cutoffs.min_cells} cells" - if cutoffs.min_counts is None - else f"{cutoffs.min_counts} counts" + f"{min_cells} cells" if min_counts is None else f"{min_counts} counts" ) - if cutoffs.max_cells is not None or cutoffs.max_counts is not None: + if max_cells is not None or max_counts is not None: msg += "in more than " msg += ( - f"{cutoffs.max_cells} cells" - if cutoffs.max_counts is None - else f"{cutoffs.max_counts} counts" + f"{max_cells} cells" if max_counts is None else f"{max_counts} counts" ) logg.info(msg) return gene_subset, number_per_gene From 80c24a91704a387caa530fbc86303555bdfdc901 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 17 Jun 2025 17:11:56 +0200 Subject: [PATCH 42/42] mask rank_genes_groups --- src/scanpy/_settings/presets.py | 11 ++++++++--- src/scanpy/preprocessing/_pca/__init__.py | 4 ++-- src/scanpy/tools/_rank_genes_groups.py | 17 +++++++++++------ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 95081d3421..83188a9817 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -41,6 +41,7 @@ class PcaPreset(NamedTuple): class RankGenesGroupsPreset(NamedTuple): method: DETest + mask_var: str | None class LeidenPreset(NamedTuple): @@ -133,9 +134,13 @@ def pca() -> Mapping[Preset, PcaPreset]: def rank_genes_groups() -> Mapping[Preset, RankGenesGroupsPreset]: """Correlation method for :func:`~scanpy.tl.rank_genes_groups`.""" return { - Preset.ScanpyV1: RankGenesGroupsPreset(method="t-test"), - Preset.ScanpyV2Preview: RankGenesGroupsPreset(method="wilcoxon"), - Preset.SeuratV5: RankGenesGroupsPreset(method="wilcoxon"), + Preset.ScanpyV1: RankGenesGroupsPreset(method="t-test", mask_var=None), + Preset.ScanpyV2Preview: RankGenesGroupsPreset( + method="wilcoxon", mask_var=None + ), + Preset.SeuratV5: RankGenesGroupsPreset( + method="wilcoxon", mask_var="highly_variable" + ), } @preset_property diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index b2ac381b7a..90caa5cf3c 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -73,7 +73,7 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 mask_var: NDArray[np.bool_] | str | None | Empty = _empty, use_highly_variable: bool | None = None, dtype: DTypeLike = "float32", - key_added: str | None = None, + key_added: str | None | Empty = _empty, copy: bool = False, ) -> AnnData | np.ndarray | CSBase | None: r"""Principal component analysis :cite:p:`Pedregosa2011`. @@ -205,7 +205,7 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 # Current chunking implementation relies on pca being called on X msg = "Cannot use `layer` and `chunked` at the same time." raise NotImplementedError(msg) - if key_added is None: + if key_added is _empty: key_added = settings.preset.pca.key_added # chunked calculation is not randomized, anyways diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index e966bd43fd..1054b27bf8 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -15,6 +15,7 @@ from .._compat import CSBase, njit, old_positionals from .._settings.presets import DETest from .._utils import ( + _empty, check_nonnegative_integers, get_literal_vals, raise_not_implemented_error_if_backed_type, @@ -28,6 +29,8 @@ from anndata import AnnData from numpy.typing import NDArray + from .._utils import Empty + _CorrMethod = Literal["benjamini-hochberg", "bonferroni"] @@ -507,7 +510,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 adata: AnnData, groupby: str, *, - mask_var: NDArray[np.bool_] | str | None = None, + mask_var: NDArray[np.bool_] | str | None | Empty = _empty, use_raw: bool | None = None, groups: Literal["all"] | Iterable[str] = "all", reference: str = "rest", @@ -620,6 +623,13 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 >>> sc.pl.rank_genes_groups(adata) """ + from scanpy import settings + + if mask_var is _empty: + mask_var = settings.preset.rank_genes_groups.mask_var + if method is None: + method = settings.preset.rank_genes_groups.method + mask_var = _check_mask(adata, mask_var, "var") if use_raw is None: @@ -628,11 +638,6 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 msg = "Received `use_raw=True`, but `adata.raw` is empty." raise ValueError(msg) - if method is None: - from scanpy import settings - - method = settings.preset.rank_genes_groups.method - if "only_positive" in kwds: rankby_abs = not kwds.pop("only_positive") # backwards compat