diff --git a/docs/api/settings.md b/docs/api/settings.md index acec01851a..038ea7c500 100644 --- a/docs/api/settings.md +++ b/docs/api/settings.md @@ -30,6 +30,16 @@ An object that allows configuring Scanpy. Some selected settings are discussed in the following. +Presets allow to set the behavior of many scanpy functions at once: + +```{eval-rst} +.. autosummary:: + :signatures: none + :toctree: ../generated/ + + Preset +``` + Verbosity controls the amount of logging output: ```{eval-rst} diff --git a/docs/conf.py b/docs/conf.py index 62b95bdee2..009f0d4853 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -219,6 +219,9 @@ def setup(app: Sphinx): "scanpy.plotting._matrixplot.MatrixPlot": "scanpy.pl.MatrixPlot", "scanpy.plotting._dotplot.DotPlot": "scanpy.pl.DotPlot", "scanpy.plotting._stacked_violin.StackedViolin": "scanpy.pl.StackedViolin", + "scanpy._param_sets.HVGFlavor": "tuple", + "scanpy._param_sets.FilterCellsCutoffs": "tuple", + "scanpy._param_sets.FilterGenesCutoffs": "tuple", "pandas.core.series.Series": "pandas.Series", "numpy.bool_": "numpy.bool", # Since numpy 2, numpy.bool is the canonical dtype } diff --git a/docs/release-notes/3653.feature.md b/docs/release-notes/3653.feature.md new file mode 100644 index 0000000000..377f971276 --- /dev/null +++ b/docs/release-notes/3653.feature.md @@ -0,0 +1 @@ +Add {attr}`scanpy.settings.preset` setting with two new presets: {attr}`~scanpy.settings.Preset.SeuratV5` and {attr}`~scanpy.settings.Preset.ScanpyV2Preview`. {smaller}`P Angerer` diff --git a/src/scanpy/__init__.py b/src/scanpy/__init__.py index 47bffe370b..1480d7e01e 100644 --- a/src/scanpy/__init__.py +++ b/src/scanpy/__init__.py @@ -15,7 +15,7 @@ # the actual API # (start with settings as several tools are using it) -from ._settings import Verbosity, settings +from ._settings import Preset, Verbosity, settings set_figure_params = settings._set_figure_params @@ -62,6 +62,7 @@ __all__ = [ "AnnData", "Neighbors", + "Preset", "Verbosity", "__version__", "concat", diff --git a/src/scanpy/_settings/__init__.py b/src/scanpy/_settings/__init__.py index fab212455f..6422da0fe3 100644 --- a/src/scanpy/_settings/__init__.py +++ b/src/scanpy/_settings/__init__.py @@ -11,6 +11,7 @@ from .._compat import deprecated, old_positionals from .._singleton import SingletonMeta from ..logging import _RootLogger, _set_log_file, _set_log_level +from .presets import Preset from .verbosity import Verbosity if TYPE_CHECKING: @@ -27,7 +28,6 @@ | Literal["raw", "rgba"] ) - S = TypeVar("S") T = TypeVar("T") P = ParamSpec("P") @@ -67,6 +67,7 @@ def wrapped(self: S, var: T, *args: P.args, **kwargs: P.kwargs) -> R: class SettingsMeta(SingletonMeta): + _preset: Preset # logging _root_logger: _RootLogger _logfile: TextIO | None @@ -99,6 +100,15 @@ class SettingsMeta(SingletonMeta): _previous_memory_usage: int """Stores the previous memory usage.""" + @property + def preset(cls) -> Preset: + """Preset to use.""" + return cls._preset + + @preset.setter + def preset(cls, preset: Preset | str) -> None: + cls._preset = Preset(preset) + @property def verbosity(cls) -> Verbosity: """Verbosity level (default :attr:`Verbosity.warning`).""" @@ -336,7 +346,7 @@ def categories_to_ignore(cls, categories_to_ignore: Iterable[str]) -> None: @deprecated("Use `scanpy.set_figure_params` instead") def set_figure_params(cls, *args, **kwargs) -> None: - cls.set_figure_params(*args, **kwargs) + cls._set_figure_params(*args, **kwargs) @old_positionals( "scanpy", @@ -455,6 +465,7 @@ class settings(metaclass=SettingsMeta): def __new__(cls) -> type[Self]: return cls + _preset = Preset.ScanpyV1 # logging _root_logger: ClassVar = _RootLogger(logging.INFO) _logfile: ClassVar = None diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py new file mode 100644 index 0000000000..83188a9817 --- /dev/null +++ b/src/scanpy/_settings/presets.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import inspect +import re +from contextlib import contextmanager +from enum import StrEnum, auto +from functools import cached_property, partial, wraps +from typing import TYPE_CHECKING, Literal, NamedTuple, TypeVar + +if TYPE_CHECKING: + from collections.abc import Callable, Generator, Mapping + + +NT = TypeVar("NT", bound=NamedTuple) + +__all__ = [ + "DETest", + "HVGFlavor", + "HVGPreset", + "LeidenFlavor", + "LeidenPreset", + "PcaPreset", + "Preset", + "RankGenesGroupsPreset", +] + + +DETest = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] +HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] +LeidenFlavor = Literal["leidenalg", "igraph"] + + +class HVGPreset(NamedTuple): + flavor: HVGFlavor + return_df: bool + + +class PcaPreset(NamedTuple): + key_added: str | None + + +class RankGenesGroupsPreset(NamedTuple): + method: DETest + mask_var: str | None + + +class LeidenPreset(NamedTuple): + flavor: LeidenFlavor + + +preset_postprocessors: list[Callable[[], None]] = [] + + +def named_tuple_non_defaults( + nt: NamedTuple, +) -> Generator[tuple[str, object], None, None]: + cls = type(nt) + for param in cls._fields: + value = getattr(nt, param) + if param not in cls._field_defaults or value != cls._field_defaults[param]: + yield param, value + + +def postprocess_preset_prop( + prop: cached_property[NT], get_map: Callable[[], Mapping[Preset, NT]] +) -> None: + map = get_map() + + map_type = inspect.signature(get_map).return_annotation + m = re.fullmatch(r"Mapping\[Preset, (.*)\]", map_type) + assert m is not None + value_type = m[1] + + added_doc = "\n".join( + ":attr:`{name}`\n Defaults: {defaults}".format( + name=k.name, + defaults=", ".join( + f"`{param}={default!r}`" + for param, default in named_tuple_non_defaults(params) + ) + or "none", + ) + for k, params in map.items() + ) + + prop.__doc__ = f"{prop.__doc__}\n\n{added_doc}" + prop.func.__annotations__["return"] = value_type + + +def preset_property(get_map: Callable[[], Mapping[Preset, NT]]) -> cached_property[NT]: + @wraps(get_map) + def get(self: Preset) -> NT: + return get_map()[self] + + prop = cached_property(get) + preset_postprocessors.append(partial(postprocess_preset_prop, prop, get_map)) + return prop + + +class Preset(StrEnum): + """Presets for :attr:`scanpy.settings.preset`. + + See properties below for details. + """ + + ScanpyV1 = auto() + """Scanpy 1.*’s default settings.""" + + ScanpyV2Preview = auto() + """Scanpy 2.*’s feature default settings. (Preview: subject to change!)""" + + SeuratV5 = auto() + """Try to match Seurat 5.* as closely as possible.""" + + @preset_property + def highly_variable_genes() -> Mapping[Preset, HVGPreset]: + """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" + return { + Preset.ScanpyV1: HVGPreset(flavor="seurat", return_df=False), + Preset.ScanpyV2Preview: HVGPreset(flavor="seurat_v3_paper", return_df=True), + Preset.SeuratV5: HVGPreset(flavor="seurat_v3_paper", return_df=True), + } + + @preset_property + def pca() -> Mapping[Preset, PcaPreset]: + """Settings for :func:`~scanpy.pp.pca`.""" # noqa: D401 + return { + Preset.ScanpyV1: PcaPreset(key_added=None), + Preset.ScanpyV2Preview: PcaPreset(key_added="pca"), + Preset.SeuratV5: PcaPreset(key_added="pca"), + } + + @preset_property + def rank_genes_groups() -> Mapping[Preset, RankGenesGroupsPreset]: + """Correlation method for :func:`~scanpy.tl.rank_genes_groups`.""" + return { + Preset.ScanpyV1: RankGenesGroupsPreset(method="t-test", mask_var=None), + Preset.ScanpyV2Preview: RankGenesGroupsPreset( + method="wilcoxon", mask_var=None + ), + Preset.SeuratV5: RankGenesGroupsPreset( + method="wilcoxon", mask_var="highly_variable" + ), + } + + @preset_property + def leiden() -> Mapping[Preset, LeidenPreset]: + """Flavor for :func:`~scanpy.tl.leiden`.""" + return { + Preset.ScanpyV1: LeidenPreset(flavor="leidenalg"), + Preset.ScanpyV2Preview: LeidenPreset(flavor="igraph"), + Preset.SeuratV5: LeidenPreset(flavor="leidenalg"), + } + + @contextmanager + def override(self, preset: Preset) -> Generator[Preset, None, None]: + """Temporarily override :attr:`scanpy.settings.preset`. + + >>> import scanpy as sc + >>> sc.settings.preset = sc.Preset.ScanpyV1 + >>> with sc.settings.preset.override(sc.Preset.SeuratV5): + ... sc.settings.preset + + >>> sc.settings.preset + + """ + from scanpy import settings + + settings.preset = preset + try: + yield self + finally: + settings.preset = self + + +for postprocess in preset_postprocessors: + postprocess() diff --git a/src/scanpy/_types.py b/src/scanpy/_types.py deleted file mode 100644 index af2d430fe4..0000000000 --- a/src/scanpy/_types.py +++ /dev/null @@ -1,7 +0,0 @@ -from __future__ import annotations - -from typing import Literal - -__all__ = ["HVGFlavor"] - -HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] diff --git a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py index 87bf963ca5..270f1ca1e3 100644 --- a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py +++ b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py @@ -42,7 +42,8 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 log: bool = True, subset: bool = True, copy: bool = False, -) -> AnnData | np.recarray | None: + return_df: bool | None = None, +) -> AnnData | pd.DataFrame | np.recarray | None: """Extract highly variable genes :cite:p:`Satija2015,Zheng2017`. .. deprecated:: 1.3.6 @@ -117,8 +118,8 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 **dispersions_norm** : adata.var Normalized dispersions per gene. Logarithmized when `log` is `True`. - If a data matrix `X` is passed, the annotation is returned as `np.recarray` - with the same information stored in fields: `gene_subset`, `means`, `dispersions`, `dispersion_norm`. + If a data matrix `X` is passed, the annotation is returned as `pd.DataFrame` (if `return_df=True`) or `np.recarray` + with the same information stored in columns: `gene_subset`, `means`, `dispersions`, `dispersions_norm`. """ if n_top_genes is not None and not all( @@ -126,6 +127,10 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 ): msg = "If you pass `n_top_genes`, all cutoffs are ignored." warnings.warn(msg, UserWarning, stacklevel=2) + if return_df is None: + from scanpy import settings + + return_df = settings.preset.highly_variable_genes.return_df if min_disp is None: min_disp = 0.5 if min_mean is None: @@ -143,10 +148,10 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 max_mean=max_mean, n_top_genes=n_top_genes, flavor=flavor, - ) - adata.var["means"] = result["means"] - adata.var["dispersions"] = result["dispersions"] - adata.var["dispersions_norm"] = result["dispersions_norm"] + return_df=True, + ).set_index(adata.var_names) + assert isinstance(result, pd.DataFrame) + adata.var[cols] = result[cols := ["means", "dispersions", "dispersions_norm"]] if subset: adata._inplace_subset_var(result["gene_subset"]) else: @@ -154,21 +159,21 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 return adata if copy else None start = logg.info("extracting highly variable genes") X = data # no copy necessary, X remains unchanged in the following - mean, var = materialize_as_ndarray(mean_var(X, axis=0, correction=1)) + means, vars = materialize_as_ndarray(mean_var(X, axis=0, correction=1)) # now actually compute the dispersion - mean[mean == 0] = 1e-12 # set entries equal to zero to small value - dispersion = var / mean + means[means == 0] = 1e-12 # set entries equal to zero to small value + dispersions = vars / means if log: # logarithmized mean as in Seurat - dispersion[dispersion == 0] = np.nan - dispersion = np.log(dispersion) - mean = np.log1p(mean) + dispersions[dispersions == 0] = np.nan + dispersions = np.log(dispersions) + means = np.log1p(means) # all of the following quantities are "per-gene" here df = pd.DataFrame() - df["mean"] = mean - df["dispersion"] = dispersion + df["means"] = means + df["dispersions"] = dispersions if flavor == "seurat": - df["mean_bin"] = pd.cut(df["mean"], bins=n_bins) - disp_grouped = df.groupby("mean_bin", observed=True)["dispersion"] + df["mean_bin"] = pd.cut(df["means"], bins=n_bins) + disp_grouped = df.groupby("mean_bin", observed=True)["dispersions"] disp_mean_bin = disp_grouped.mean() disp_std_bin = disp_grouped.std(ddof=1) # retrieve those genes that have nan std, these are the ones where @@ -187,70 +192,59 @@ def filter_genes_dispersion( # noqa: PLR0912, PLR0913, PLR0915 disp_std_bin[one_gene_per_bin] = disp_mean_bin[one_gene_per_bin.values].values disp_mean_bin[one_gene_per_bin] = 0 # actually do the normalization - df["dispersion_norm"] = ( + df["dispersions_norm"] = ( # use values here as index differs - df["dispersion"].values - disp_mean_bin[df["mean_bin"].values].values + df["dispersions"].values - disp_mean_bin[df["mean_bin"].values].values ) / disp_std_bin[df["mean_bin"].values].values elif flavor == "cell_ranger": from statsmodels import robust df["mean_bin"] = pd.cut( - df["mean"], - np.r_[-np.inf, np.percentile(df["mean"], np.arange(10, 105, 5)), np.inf], + df["means"], + np.r_[-np.inf, np.percentile(df["means"], np.arange(10, 105, 5)), np.inf], ) - disp_grouped = df.groupby("mean_bin", observed=True)["dispersion"] + disp_grouped = df.groupby("mean_bin", observed=True)["dispersions"] disp_median_bin = disp_grouped.median() # the next line raises the warning: "Mean of empty slice" with warnings.catch_warnings(): warnings.simplefilter("ignore") disp_mad_bin = disp_grouped.apply(robust.mad) - df["dispersion_norm"] = ( + df["dispersions_norm"] = ( np.abs( - df["dispersion"].values - disp_median_bin[df["mean_bin"].values].values + df["dispersions"].values - disp_median_bin[df["mean_bin"].values].values ) / disp_mad_bin[df["mean_bin"].values].values ) else: msg = '`flavor` needs to be "seurat" or "cell_ranger"' raise ValueError(msg) - dispersion_norm = df["dispersion_norm"].values.astype("float32") + dispersions_norm = df["dispersions_norm"].values.astype("float32") if n_top_genes is not None: - dispersion_norm = dispersion_norm[~np.isnan(dispersion_norm)] - dispersion_norm[ - ::-1 - ].sort() # interestingly, np.argpartition is slightly slower - disp_cut_off = dispersion_norm[n_top_genes - 1] - gene_subset = df["dispersion_norm"].values >= disp_cut_off + dispersions_norm = dispersions_norm[~np.isnan(dispersions_norm)] + # interestingly, np.argpartition is slightly slower than this: + dispersions_norm[::-1].sort() + disp_cut_off = dispersions_norm[n_top_genes - 1] + gene_subset = df["dispersions_norm"].values >= disp_cut_off logg.debug( f"the {n_top_genes} top genes correspond to a " f"normalized dispersion cutoff of {disp_cut_off}" ) else: max_disp = np.inf if max_disp is None else max_disp - dispersion_norm[np.isnan(dispersion_norm)] = 0 # similar to Seurat + dispersions_norm[np.isnan(dispersions_norm)] = 0 # similar to Seurat gene_subset = np.logical_and.reduce( ( - mean > min_mean, - mean < max_mean, - dispersion_norm > min_disp, - dispersion_norm < max_disp, + means > min_mean, + means < max_mean, + dispersions_norm > min_disp, + dispersions_norm < max_disp, ) ) + df["gene_subset"] = gene_subset + df["dispersions_norm"] = df["dispersions_norm"].astype("float32") logg.info(" finished", time=start) - return np.rec.fromarrays( - ( - gene_subset, - df["mean"].values, - df["dispersion"].values, - df["dispersion_norm"].values.astype("float32", copy=False), - ), - dtype=[ - ("gene_subset", bool), - ("means", "float32"), - ("dispersions", "float32"), - ("dispersions_norm", "float32"), - ], - ) + rv = df[["gene_subset", "means", "dispersions", "dispersions_norm"]] + return rv if return_df else rv.to_records(index=False) def filter_genes_cv_deprecated(X, Ecutoff, cvFilter): diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 0382b5fe9f..b6e7c21a87 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -24,7 +24,7 @@ from numpy.typing import NDArray - from .._types import HVGFlavor + from .._settings.presets import HVGFlavor def _highly_variable_genes_seurat_v3( # noqa: PLR0912, PLR0915 @@ -528,7 +528,7 @@ def highly_variable_genes( # noqa: PLR0913 max_mean: float = 3, span: float = 0.3, n_bins: int = 20, - flavor: HVGFlavor = "seurat", + flavor: HVGFlavor | None = None, subset: bool = False, inplace: bool = True, batch_key: str | None = None, @@ -597,9 +597,10 @@ def highly_variable_genes( # noqa: PLR0913 the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. flavor - Choose the flavor for identifying highly variable genes. For the dispersion - based methods in their default workflows, Seurat passes the cutoffs whereas - Cell Ranger passes `n_top_genes`. + Choose the flavor for identifying highly variable genes + (default depends on :attr:`scanpy.settings.preset` property :attr:`~scanpy.Preset.highly_variable_genes`). + For the dispersion based methods in their default workflows, + `'seurat'` passes the cutoffs whereas `'cell_ranger'` passes `n_top_genes`. subset Inplace subset to highly-variable genes if `True` otherwise merely indicate highly variable genes. @@ -646,6 +647,11 @@ def highly_variable_genes( # noqa: PLR0913 This function replaces :func:`~scanpy.pp.filter_genes_dispersion`. """ + if flavor is None: + from .. import settings + + flavor = settings.preset.highly_variable_genes.flavor + start = logg.info("extracting highly variable genes") if not isinstance(adata, AnnData): diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index 3b34845789..90caa5cf3c 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -73,7 +73,7 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 mask_var: NDArray[np.bool_] | str | None | Empty = _empty, use_highly_variable: bool | None = None, dtype: DTypeLike = "float32", - key_added: str | None = None, + key_added: str | None | Empty = _empty, copy: bool = False, ) -> AnnData | np.ndarray | CSBase | None: r"""Principal component analysis :cite:p:`Pedregosa2011`. @@ -205,6 +205,8 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915 # Current chunking implementation relies on pca being called on X msg = "Cannot use `layer` and `chunked` at the same time." raise NotImplementedError(msg) + if key_added is _empty: + key_added = settings.preset.pca.key_added # chunked calculation is not randomized, anyways if svd_solver in {"auto", "randomized"} and not chunked: diff --git a/src/scanpy/preprocessing/_recipes.py b/src/scanpy/preprocessing/_recipes.py index 420c6904a0..e55296851f 100644 --- a/src/scanpy/preprocessing/_recipes.py +++ b/src/scanpy/preprocessing/_recipes.py @@ -5,13 +5,7 @@ from typing import TYPE_CHECKING from .. import logging as logg -from .. import preprocessing as pp from .._compat import CSBase, old_positionals -from ._deprecated.highly_variable_genes import ( - filter_genes_cv_deprecated, - filter_genes_dispersion, -) -from ._normalization import normalize_total if TYPE_CHECKING: from anndata import AnnData @@ -54,7 +48,9 @@ def recipe_weinreb17( Return a copy if true. """ + from .. import pp from ._deprecated import normalize_per_cell_weinreb16_deprecated, zscore_deprecated + from ._deprecated.highly_variable_genes import filter_genes_cv_deprecated if isinstance(adata.X, CSBase): msg = "`recipe_weinreb16 does not support sparse matrices." @@ -102,21 +98,20 @@ def recipe_seurat( Return a copy if true. """ + from .. import pl, pp + from ._deprecated.highly_variable_genes import filter_genes_dispersion + if copy: adata = adata.copy() pp.filter_cells(adata, min_genes=200) pp.filter_genes(adata, min_cells=3) - normalize_total(adata, target_sum=1e4) + pp.normalize_total(adata, target_sum=1e4) filter_result = filter_genes_dispersion( adata.X, min_mean=0.0125, max_mean=3, min_disp=0.5, log=not log ) if plot: - from ..plotting import ( - _preprocessing as ppp, - ) - - ppp.filter_genes_dispersion(filter_result, log=not log) - adata._inplace_subset_var(filter_result.gene_subset) # filter genes + pl.filter_genes_dispersion(filter_result, log=not log) + adata._inplace_subset_var(filter_result["gene_subset"]) # filter genes if log: pp.log1p(adata) pp.scale(adata, max_value=10) @@ -175,24 +170,25 @@ def recipe_zheng17( Returns or updates `adata` depending on `copy`. """ + from .. import pl, pp + from ._deprecated.highly_variable_genes import filter_genes_dispersion + start = logg.info("running recipe zheng17") if copy: adata = adata.copy() # only consider genes with more than 1 count pp.filter_genes(adata, min_counts=1) # normalize with total UMI count per cell - normalize_total(adata, key_added="n_counts_all") + pp.normalize_total(adata, key_added="n_counts_all") filter_result = filter_genes_dispersion( adata.X, flavor="cell_ranger", n_top_genes=n_top_genes, log=False ) if plot: # should not import at the top of the file - from ..plotting import _preprocessing as ppp - - ppp.filter_genes_dispersion(filter_result, log=True) + pl.filter_genes_dispersion(filter_result, log=True) # actually filter the genes, the following is the inplace version of # adata = adata[:, filter_result.gene_subset] adata._inplace_subset_var(filter_result.gene_subset) # filter genes - normalize_total(adata) # renormalize after filtering + pp.normalize_total(adata) # renormalize after filtering if log: pp.log1p(adata) # log transform: X = log(X + 1) pp.scale(adata) diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index a8fb274203..6dc39e5087 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -20,7 +20,7 @@ from .. import logging as logg from .._compat import CSBase, CSRBase, DaskArray, deprecated, njit, old_positionals -from .._settings import settings as sett +from .._settings import settings from .._utils import ( _check_array_function_arguments, _resolve_axis, @@ -143,7 +143,7 @@ def filter_cells( ) if n_given_options != 1: msg = ( - "Only provide one of the optional parameters `min_counts`, " + "Provide exactly one of the optional parameters `min_counts`, " "`min_genes`, `max_counts`, `max_genes` per call." ) raise ValueError(msg) @@ -257,7 +257,7 @@ def filter_genes( ) if n_given_options != 1: msg = ( - "Only provide one of the optional parameters `min_counts`, " + "Provide exactly one of the optional parameters `min_counts`, " "`min_cells`, `max_counts`, `max_cells` per call." ) raise ValueError(msg) @@ -724,7 +724,7 @@ def regress_out( if isinstance(X, CSBase): logg.info(" sparse input is densified and may lead to high memory use") - n_jobs = sett.n_jobs if n_jobs is None else n_jobs + n_jobs = settings.n_jobs if n_jobs is None else n_jobs # regress on a single categorical variable variable_is_categorical = False @@ -850,7 +850,7 @@ def sample( fraction: float | None = None, *, n: int | None = None, - rng: RNGLike | SeedLike | None = 0, + rng: RNGLike | SeedLike | None = None, copy: Literal[False] = False, replace: bool = False, axis: Literal["obs", 0, "var", 1] = "obs", diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py index 5658721609..ca2b958597 100644 --- a/src/scanpy/tools/_leiden.py +++ b/src/scanpy/tools/_leiden.py @@ -13,11 +13,11 @@ if TYPE_CHECKING: from collections.abc import Sequence - from typing import Literal from anndata import AnnData from .._compat import CSBase + from .._settings.presets import LeidenFlavor from .._utils.random import _LegacyRandom try: # sphinx-autodoc-typehints + optional dependency @@ -43,7 +43,7 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915 neighbors_key: str | None = None, obsp: str | None = None, copy: bool = False, - flavor: Literal["leidenalg", "igraph"] = "leidenalg", + flavor: LeidenFlavor | None = None, **clustering_args, ) -> AnnData | None: """Cluster cells into subgroups :cite:p:`Traag2019`. @@ -118,28 +118,34 @@ def leiden( # noqa: PLR0912, PLR0913, PLR0915 and `n_iterations`. """ - if flavor not in {"igraph", "leidenalg"}: - msg = ( - f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed" - ) - raise ValueError(msg) - _utils.ensure_igraph() - if flavor == "igraph": - if directed: - msg = "Cannot use igraph’s leiden implementation with a directed graph." - raise ValueError(msg) - if partition_type is not None: - msg = "Do not pass in partition_type argument when using igraph." - raise ValueError(msg) - else: - try: - import leidenalg - - msg = 'In the future, the default backend for leiden will be igraph instead of leidenalg.\n\n To achieve the future defaults please pass: flavor="igraph" and n_iterations=2. directed must also be False to work with igraph\'s implementation.' + if flavor is None: + from scanpy import settings + + flavor = settings.preset.leiden.flavor + _utils.ensure_igraph() # we need igraph regardless of `flavor` + match flavor: + case "igraph": + if directed: + msg = "Cannot use igraph’s leiden implementation with a directed graph." + raise ValueError(msg) + if partition_type is not None: + msg = "Do not pass in partition_type argument when using igraph." + raise ValueError(msg) + case "leidenalg": + try: + import leidenalg + except ImportError as e: + msg = "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`." + raise ImportError(msg) from e + msg = ( + "In the future, the default backend for leiden will be igraph instead of leidenalg.\n\n" + 'To achieve the future defaults please pass: `flavor="igraph"` and n_iterations=2. ' + "directed must also be False to work with igraph's implementation." + ) _utils.warn_once(msg, FutureWarning, stacklevel=3) - except ImportError as e: - msg = "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`." - raise ImportError(msg) from e + case _: + msg = f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed." + raise ValueError(msg) clustering_args = dict(clustering_args) start = logg.info("running Leiden clustering") diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 2a9669a11f..1054b27bf8 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING import numba import numpy as np @@ -13,7 +13,9 @@ from .. import _utils from .. import logging as logg from .._compat import CSBase, njit, old_positionals +from .._settings.presets import DETest from .._utils import ( + _empty, check_nonnegative_integers, get_literal_vals, raise_not_implemented_error_if_backed_type, @@ -22,15 +24,15 @@ if TYPE_CHECKING: from collections.abc import Generator, Iterable + from typing import Literal from anndata import AnnData from numpy.typing import NDArray - _CorrMethod = Literal["benjamini-hochberg", "bonferroni"] + from .._utils import Empty + _CorrMethod = Literal["benjamini-hochberg", "bonferroni"] -# Used with get_literal_vals -_Method = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] _CONST_MAX_SIZE = 10000000 @@ -420,7 +422,7 @@ def logreg( def compute_statistics( # noqa: PLR0912 self, - method: _Method, + method: DETest, *, corr_method: _CorrMethod = "benjamini-hochberg", n_genes_user: int | None = None, @@ -508,7 +510,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 adata: AnnData, groupby: str, *, - mask_var: NDArray[np.bool_] | str | None = None, + mask_var: NDArray[np.bool_] | str | None | Empty = _empty, use_raw: bool | None = None, groups: Literal["all"] | Iterable[str] = "all", reference: str = "rest", @@ -517,7 +519,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 pts: bool = False, key_added: str | None = None, copy: bool = False, - method: _Method | None = None, + method: DETest | None = None, corr_method: _CorrMethod = "benjamini-hochberg", tie_correct: bool = False, layer: str | None = None, @@ -621,6 +623,13 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 >>> sc.pl.rank_genes_groups(adata) """ + from scanpy import settings + + if mask_var is _empty: + mask_var = settings.preset.rank_genes_groups.mask_var + if method is None: + method = settings.preset.rank_genes_groups.method + mask_var = _check_mask(adata, mask_var, "var") if use_raw is None: @@ -629,14 +638,11 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 msg = "Received `use_raw=True`, but `adata.raw` is empty." raise ValueError(msg) - if method is None: - method = "t-test" - if "only_positive" in kwds: rankby_abs = not kwds.pop("only_positive") # backwards compat start = logg.info("ranking genes") - if method not in (avail_methods := get_literal_vals(_Method)): + if method not in (avail_methods := get_literal_vals(DETest)): msg = f"Method must be one of {avail_methods}." raise ValueError(msg) diff --git a/src/scanpy/tools/_umap.py b/src/scanpy/tools/_umap.py index 7803616924..bf95995210 100644 --- a/src/scanpy/tools/_umap.py +++ b/src/scanpy/tools/_umap.py @@ -233,8 +233,7 @@ def umap( # noqa: PLR0913, PLR0915 ) elif method == "rapids": msg = ( - "`method='rapids'` is deprecated. " - "Use `rapids_singlecell.tl.louvain` instead." + "`method='rapids'` is deprecated. Use `rapids_singlecell.tl.umap` instead." ) warnings.warn(msg, FutureWarning, stacklevel=2) metric = neigh_params.get("metric", "euclidean")