diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 000000000..29888e24e --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,63 @@ +name: Benchmark + +on: + push: + branches: [main] + pull_request: + branches: [main] + +env: + FORCE_COLOR: "1" + +defaults: + run: + shell: bash -e {0} # -e to fail on error + +jobs: + benchmark: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + python: ["3.12"] + os: [ubuntu-latest] + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python }} + ASV_DIR: "${{ github.workspace }}/benchmarks" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Fetch main branch for `asv run`’s hash + run: git fetch origin main:main + if: ${{ github.ref_name != 'main' }} + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + cache: "pip" + + - name: Cache datasets + uses: actions/cache@v4 + with: + path: | + ~/.cache + key: benchmark-state-${{ hashFiles('benchmarks/**') }} + + - name: Install dependencies + # https://github.com/airspeed-velocity/asv/issues/1493 + run: pip install 'asv@git+https://github.com/airspeed-velocity/asv.git' virtualenv + + - name: Configure ASV + working-directory: ${{ env.ASV_DIR }} + run: asv machine --yes + + - name: Quick benchmark run + working-directory: ${{ env.ASV_DIR }} + run: asv run --dry-run --quick --show-stderr --verbose HEAD^! diff --git a/.mypy.ini b/.mypy.ini index 7fb81b6dc..7c6728c8d 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -1,7 +1,6 @@ [mypy] mypy_path = squidpy python_version = 3.10 -plugins = numpy.typing.mypy_plugin ignore_errors = False warn_redundant_casts = True diff --git a/.prettierrc.yaml b/.prettierrc.yaml new file mode 100644 index 000000000..a7159cf26 --- /dev/null +++ b/.prettierrc.yaml @@ -0,0 +1,9 @@ +overrides: + # JSON with comments and trailing commas + - files: + - ".vscode/*.json" + - "benchmarks/*.json" + options: + parser: json5 + quoteProps: preserve + singleQuote: false diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 000000000..8ef1368f1 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,19 @@ +# Squidpy Benchmarks + +This directory contains code for benchmarking Squidpy using [asv][]. + +The functionality is checked using the [`benchmark.yml`][] workflow. +Benchmarks are run using the [benchmark bot][]. + +[asv]: https://asv.readthedocs.io/ +[`benchmark.yml`]: ../.github/workflows/benchmark.yml +[benchmark bot]: https://github.com/apps/scverse-benchmark + +## Data processing in benchmarks + +Each dataset is processed so it has + +- `.X` (containing data in C/row-major format) and `.layers['off-axis']` (containing data in FORTRAN/column-major format) with log-transformed data + +The benchmarks are set up so the `layer` parameter indicates the layer that will be moved into `.X` before the benchmark. +That way, we don’t need to add `layer=layer` everywhere. diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 000000000..a9ab427ae --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,158 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "squidpy", + + // The project's homepage + "project_url": "https://squidpy.readthedocs.io/", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "..", + + // The Python project's subdirectory in your repo. If missing or + // the empty string, the project is assumed to be located at the root + // of the repository. + // "repo_subdir": "", + + // Customizable commands for building, installing, and + // uninstalling the project. See asv.conf.json documentation. + // + "build_command": [ + "python -m pip install uv build", + "python -m build --wheel -o {build_cache_dir} {build_dir}", + ], + "install_command": ["python -m uv pip install {wheel_file}"], + // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], // for git + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/scverse/squidpy/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["3.11", "3.13"], + + // The list of conda channel names to be searched for benchmark + // dependency packages in the specified order + "conda_channels": ["conda-forge", "defaults"], + + "matrix": { + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + "req": {}, + // same for env variables + "env": {}, + }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "six": null}, // don't run without six on conda + // ], + // + // "include": [ + // // additional env for python2.7 + // {"python": "2.7", "numpy": "1.8"}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + // "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": ".asv/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache results of the recent builds in each + // environment, making them faster to install next time. This is + // the number of builds to keep, per environment. + // "build_cache_size": 2, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // }, + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // }, +} diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 000000000..72ad72a76 --- /dev/null +++ b/benchmarks/benchmarks/__init__.py @@ -0,0 +1 @@ +"""ASV benchmark suite for squidpy.""" diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py new file mode 100644 index 000000000..e8edc37da --- /dev/null +++ b/benchmarks/benchmarks/_utils.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import itertools +from functools import cache +from typing import TYPE_CHECKING + +import numpy as np +from asv_runner.benchmarks.mark import skip_for_params +from scipy.sparse import csc_matrix, csr_matrix + +import squidpy as sq + +if TYPE_CHECKING: + from collections.abc import Callable, Sequence + from collections.abc import Set as AbstractSet + from typing import Literal, Protocol, TypeVar + + from anndata import AnnData + + C = TypeVar("C", bound=Callable) # type: ignore[type-arg] + + class ParamSkipper(Protocol): + def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ... # type: ignore[type-arg] + + Dataset = Literal["imc"] + KeyX = Literal[None, "off-axis"] + + +@cache +def _imc() -> AnnData: + adata = sq.datasets.imc() # type: ignore[attr-defined] + assert isinstance(adata.X, np.ndarray) + assert not np.isfortran(adata.X) + + return adata + + +def imc() -> AnnData: + return _imc().copy() + + +def to_off_axis(x: np.ndarray | csr_matrix | csc_matrix) -> np.ndarray | csc_matrix: + if isinstance(x, csr_matrix): + return x.tocsc() + if isinstance(x, np.ndarray): + assert not np.isfortran(x) + return x.copy(order="F") + msg = f"Unexpected type {type(x)}" + raise TypeError(msg) + + +def _get_dataset_raw(dataset: Dataset) -> tuple[AnnData, str | None]: + match dataset: + case "imc": + adata, cluster_key = imc(), "cell type" + case _: + msg = f"Unknown dataset {dataset}" + raise AssertionError(msg) + + adata.layers["off-axis"] = to_off_axis(adata.X) + + return adata, cluster_key + + +def get_dataset(dataset: Dataset, *, layer: KeyX = None) -> tuple[AnnData, str | None]: + adata, batch_key = _get_dataset_raw(dataset) + if layer is not None: + adata.X = adata.layers.pop(layer) + return adata, batch_key + + +def param_skipper(param_names: Sequence[str], params: tuple[Sequence[object], ...]) -> ParamSkipper: + """Create a decorator that will skip all combinations that contain any of the given parameters. + + Examples + -------- + >>> param_names = ["letters", "numbers"] + >>> params = [["a", "b"], [3, 4, 5]] + >>> skip_when = param_skipper(param_names, params) + + >>> @skip_when(letters={"a"}, numbers={3}) + ... def func(a, b): + ... print(a, b) + >>> run_as_asv_benchmark(func) + b 4 + b 5 + + """ + + def skip(**skipped: AbstractSet) -> Callable[[C], C]: # type: ignore[type-arg] + skipped_combs = [ + tuple(record.values()) + for record in (dict(zip(param_names, vals, strict=True)) for vals in itertools.product(*params)) + if any(v in skipped.get(n, set()) for n, v in record.items()) + ] + # print(skipped_combs, file=sys.stderr) + return skip_for_params(skipped_combs) # type: ignore[no-any-return] + + return skip diff --git a/benchmarks/benchmarks/preprocessing_co_occurence.py b/benchmarks/benchmarks/preprocessing_co_occurence.py new file mode 100644 index 000000000..fe0109197 --- /dev/null +++ b/benchmarks/benchmarks/preprocessing_co_occurence.py @@ -0,0 +1,50 @@ +"""Benchmark co-occurrence operations in Squidpy. + +API documentation: . +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ._utils import get_dataset, param_skipper + +if TYPE_CHECKING: + from anndata import AnnData + + from ._utils import Dataset, KeyX + +from squidpy.gr import co_occurrence # type: ignore[attr-defined] + +# setup variables + + +adata: AnnData +cluster_key: str | None + + +def setup(dataset: Dataset, layer: KeyX, *_) -> None: # type: ignore[no-untyped-def] + """Set up global variables before each benchmark.""" + global adata, cluster_key + adata, cluster_key = get_dataset(dataset, layer=layer) + + +# ASV suite + +params: tuple[list[Dataset], list[KeyX]] = ( + [ + "imc", + ], + [None, "off-axis"], +) +param_names = ["dataset", "layer"] + +skip_when = param_skipper(param_names, params) + + +def time_co_occurrence(*_) -> None: # type: ignore[no-untyped-def] + co_occurrence(adata, cluster_key=cluster_key) + + +def peakmem_co_occurrence(*_) -> None: # type: ignore[no-untyped-def] + co_occurrence(adata, cluster_key=cluster_key)