From 274600d332c5fb372864e07aa4f6d112d56fc808 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Thu, 10 Jul 2025 16:56:02 +0200 Subject: [PATCH 01/11] init --- .github/workflows/benchmark.yml | 62 +++++++ benchmarks/README.md | 19 +++ benchmarks/asv.conf.json | 159 ++++++++++++++++++ benchmarks/benchmarks/__init__.py | 1 + benchmarks/benchmarks/_utils.py | 109 ++++++++++++ .../benchmarks/preprocessing_co_occurence.py | 50 ++++++ 6 files changed, 400 insertions(+) create mode 100644 .github/workflows/benchmark.yml create mode 100644 benchmarks/README.md create mode 100644 benchmarks/asv.conf.json create mode 100644 benchmarks/benchmarks/__init__.py create mode 100644 benchmarks/benchmarks/_utils.py create mode 100644 benchmarks/benchmarks/preprocessing_co_occurence.py diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 000000000..1e952440b --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,62 @@ +name: Benchmark + +on: + push: + branches: [main] + pull_request: + branches: [main] + +env: + FORCE_COLOR: "1" + +defaults: + run: + shell: bash -e {0} # -e to fail on error + +jobs: + benchmark: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + python: ["3.13"] + os: [ubuntu-latest] + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python }} + ASV_DIR: "./benchmarks" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Fetch main branch for `asv run`’s hash + run: git fetch origin main:main + if: ${{ github.ref_name != 'main' }} + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + cache: 'pip' + + - name: Cache datasets + uses: actions/cache@v4 + with: + path: | + ~/.cache + key: benchmark-state-${{ hashFiles('benchmarks/**') }} + + - name: Install dependencies + run: pip install 'asv>=0.6.4' + + - name: Configure ASV + working-directory: ${{ env.ASV_DIR }} + run: asv machine --yes + + - name: Quick benchmark run + working-directory: ${{ env.ASV_DIR }} + run: asv run --dry-run --quick --show-stderr --verbose HEAD^! diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 000000000..8ef1368f1 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,19 @@ +# Squidpy Benchmarks + +This directory contains code for benchmarking Squidpy using [asv][]. + +The functionality is checked using the [`benchmark.yml`][] workflow. +Benchmarks are run using the [benchmark bot][]. + +[asv]: https://asv.readthedocs.io/ +[`benchmark.yml`]: ../.github/workflows/benchmark.yml +[benchmark bot]: https://github.com/apps/scverse-benchmark + +## Data processing in benchmarks + +Each dataset is processed so it has + +- `.X` (containing data in C/row-major format) and `.layers['off-axis']` (containing data in FORTRAN/column-major format) with log-transformed data + +The benchmarks are set up so the `layer` parameter indicates the layer that will be moved into `.X` before the benchmark. +That way, we don’t need to add `layer=layer` everywhere. diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 000000000..fe24b6748 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,159 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "squidpy", + + // The project's homepage + "project_url": "https://squidpy.readthedocs.io/", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "..", + + // The Python project's subdirectory in your repo. If missing or + // the empty string, the project is assumed to be located at the root + // of the repository. + // "repo_subdir": "", + + // Customizable commands for building, installing, and + // uninstalling the project. See asv.conf.json documentation. + // + // "install_command": ["python -mpip install {wheel_file}"], + // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], + "build_command": [ + "python -m pip install build", + "python -m build --wheel -o {build_cache_dir} {build_dir}", + ], + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], // for git + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "conda", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/squidpy/scanpy/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["3.11", "3.13"], + + // The list of conda channel names to be searched for benchmark + // dependency packages in the specified order + "conda_channels": ["conda-forge", "defaults"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + "matrix": { + "numpy": [""], + "scipy": [""], + "squidpy": [""] + }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "six": null}, // don't run without six on conda + // ], + // + // "include": [ + // // additional env for python2.7 + // {"python": "2.7", "numpy": "1.8"}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + // "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": ".asv/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache results of the recent builds in each + // environment, making them faster to install next time. This is + // the number of builds to keep, per environment. + // "build_cache_size": 2, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // }, + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // }, +} diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 000000000..72ad72a76 --- /dev/null +++ b/benchmarks/benchmarks/__init__.py @@ -0,0 +1 @@ +"""ASV benchmark suite for squidpy.""" diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py new file mode 100644 index 000000000..fe2f1cf08 --- /dev/null +++ b/benchmarks/benchmarks/_utils.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import itertools +from functools import cache +from typing import TYPE_CHECKING + +import numpy as np +from asv_runner.benchmarks.mark import skip_for_params +from scanpy._compat import CSCBase, CSRBase + +import squidpy as sq + +if TYPE_CHECKING: + from collections.abc import Callable, Sequence + from collections.abc import Set as AbstractSet + from typing import Literal, Protocol, TypeVar + + from anndata import AnnData + + C = TypeVar("C", bound=Callable) + + class ParamSkipper(Protocol): + def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ... + + Dataset = Literal["imc"] + KeyX = Literal[None, "off-axis"] + + +@cache +def _imc() -> AnnData: + adata = sq.datasets.imc() + assert isinstance(adata.X, np.ndarray) + assert not np.isfortran(adata.X) + + return adata + + +def imc() -> AnnData: + return _imc().copy() + + +def to_off_axis(x: np.ndarray | CSRBase) -> np.ndarray | CSCBase: + if isinstance(x, CSRBase): + return x.tocsc() + if isinstance(x, np.ndarray): + assert not np.isfortran(x) + return x.copy(order="F") + msg = f"Unexpected type {type(x)}" + raise TypeError(msg) + + +def _get_dataset_raw(dataset: Dataset) -> tuple[AnnData, str | None]: + match dataset: + case "imc": + adata, cluster_key = imc(), "cell type" + case _: + msg = f"Unknown dataset {dataset}" + raise AssertionError(msg) + + adata.layers["off-axis"] = to_off_axis(adata.X) + + return adata, cluster_key + + +def get_dataset(dataset: Dataset, *, layer: KeyX = None) -> tuple[AnnData, str | None]: + adata, batch_key = _get_dataset_raw(dataset) + if layer is not None: + adata.X = adata.layers.pop(layer) + return adata, batch_key + + +def get_count_dataset(dataset: Dataset, *, layer: KeyCount = "counts") -> tuple[AnnData, str | None]: + adata, batch_key = _get_dataset_raw(dataset) + + adata.X = adata.layers.pop(layer) + # remove indicators that X was transformed + adata.uns.pop("log1p", None) + + return adata, batch_key + + +def param_skipper(param_names: Sequence[str], params: tuple[Sequence[object], ...]) -> ParamSkipper: + """Create a decorator that will skip all combinations that contain any of the given parameters. + + Examples + -------- + >>> param_names = ["letters", "numbers"] + >>> params = [["a", "b"], [3, 4, 5]] + >>> skip_when = param_skipper(param_names, params) + + >>> @skip_when(letters={"a"}, numbers={3}) + ... def func(a, b): + ... print(a, b) + >>> run_as_asv_benchmark(func) + b 4 + b 5 + + """ + + def skip(**skipped: AbstractSet) -> Callable[[C], C]: + skipped_combs = [ + tuple(record.values()) + for record in (dict(zip(param_names, vals, strict=True)) for vals in itertools.product(*params)) + if any(v in skipped.get(n, set()) for n, v in record.items()) + ] + # print(skipped_combs, file=sys.stderr) + return skip_for_params(skipped_combs) + + return skip diff --git a/benchmarks/benchmarks/preprocessing_co_occurence.py b/benchmarks/benchmarks/preprocessing_co_occurence.py new file mode 100644 index 000000000..67b8044d3 --- /dev/null +++ b/benchmarks/benchmarks/preprocessing_co_occurence.py @@ -0,0 +1,50 @@ +"""Benchmark co-occurrence operations in Squidpy. + +API documentation: . +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import squidpy as sq + +from ._utils import get_dataset, param_skipper + +if TYPE_CHECKING: + from anndata import AnnData + + from ._utils import Dataset, KeyX + +# setup variables + + +adata: AnnData +cluster_key: str | None + + +def setup(dataset: Dataset, layer: KeyX, *_): + """Set up global variables before each benchmark.""" + global adata, cluster_key + adata, cluster_key = get_dataset(dataset, layer=layer) + + +# ASV suite + +params: tuple[list[Dataset], list[KeyX]] = ( + [ + "imc", + ], + [None, "off-axis"], +) +param_names = ["dataset", "layer"] + +skip_when = param_skipper(param_names, params) + + +def time_co_occurrence(*_): + sq.gr.co_occurrence(adata, cluster_key=cluster_key) + + +def peakmem_co_occurrence(*_): + sq.gr.co_occurrence(adata, cluster_key=cluster_key) From a720e1d65811754e7dff2044ee2f36bda224c8e5 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Thu, 10 Jul 2025 17:07:51 +0200 Subject: [PATCH 02/11] update the dependencies --- benchmarks/asv.conf.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index fe24b6748..22e65e2f4 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -73,7 +73,8 @@ "matrix": { "numpy": [""], "scipy": [""], - "squidpy": [""] + "squidpy": [""], + "scanpy": [""], }, // Combinations of libraries/python versions can be excluded/included From 9b447ea656128506da14d99931bdb6815d01e954 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Mon, 14 Jul 2025 13:25:51 +0200 Subject: [PATCH 03/11] fix dependencies --- benchmarks/asv.conf.json | 3 +-- benchmarks/benchmarks/_utils.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index 22e65e2f4..953edffff 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -50,7 +50,7 @@ //"install_timeout": 600, // the base URL to show a commit for the project. - "show_commit_url": "https://github.com/squidpy/scanpy/commit/", + "show_commit_url": "https://github.com/scverse/squidpy/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. @@ -74,7 +74,6 @@ "numpy": [""], "scipy": [""], "squidpy": [""], - "scanpy": [""], }, // Combinations of libraries/python versions can be excluded/included diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py index fe2f1cf08..e1eaa6d7b 100644 --- a/benchmarks/benchmarks/_utils.py +++ b/benchmarks/benchmarks/_utils.py @@ -6,7 +6,7 @@ import numpy as np from asv_runner.benchmarks.mark import skip_for_params -from scanpy._compat import CSCBase, CSRBase +from scipy.sparse import csc_matrix, csr_matrix import squidpy as sq @@ -39,8 +39,8 @@ def imc() -> AnnData: return _imc().copy() -def to_off_axis(x: np.ndarray | CSRBase) -> np.ndarray | CSCBase: - if isinstance(x, CSRBase): +def to_off_axis(x: np.ndarray | csr_matrix | csc_matrix) -> np.ndarray | csc_matrix: + if isinstance(x, csr_matrix): return x.tocsc() if isinstance(x, np.ndarray): assert not np.isfortran(x) From 703647e77baed7b16f2c0495cafd46095b06eec5 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Mon, 14 Jul 2025 13:35:35 +0200 Subject: [PATCH 04/11] fixes to the utils file --- .github/workflows/benchmark.yml | 106 ++++++++++++++++---------------- benchmarks/asv.conf.json | 6 +- benchmarks/benchmarks/_utils.py | 10 --- 3 files changed, 56 insertions(+), 66 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 1e952440b..7cf0d85cb 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,62 +1,62 @@ name: Benchmark on: - push: - branches: [main] - pull_request: - branches: [main] + push: + branches: [main] + pull_request: + branches: [main] env: - FORCE_COLOR: "1" + FORCE_COLOR: "1" defaults: - run: - shell: bash -e {0} # -e to fail on error + run: + shell: bash -e {0} # -e to fail on error jobs: - benchmark: - runs-on: ${{ matrix.os }} - - strategy: - fail-fast: false - matrix: - python: ["3.13"] - os: [ubuntu-latest] - - env: - OS: ${{ matrix.os }} - PYTHON: ${{ matrix.python }} - ASV_DIR: "./benchmarks" - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Fetch main branch for `asv run`’s hash - run: git fetch origin main:main - if: ${{ github.ref_name != 'main' }} - - - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - cache: 'pip' - - - name: Cache datasets - uses: actions/cache@v4 - with: - path: | - ~/.cache - key: benchmark-state-${{ hashFiles('benchmarks/**') }} - - - name: Install dependencies - run: pip install 'asv>=0.6.4' - - - name: Configure ASV - working-directory: ${{ env.ASV_DIR }} - run: asv machine --yes - - - name: Quick benchmark run - working-directory: ${{ env.ASV_DIR }} - run: asv run --dry-run --quick --show-stderr --verbose HEAD^! + benchmark: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + python: ["3.13"] + os: [ubuntu-latest] + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python }} + ASV_DIR: "./benchmarks" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Fetch main branch for `asv run`’s hash + run: git fetch origin main:main + if: ${{ github.ref_name != 'main' }} + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + cache: "pip" + + - name: Cache datasets + uses: actions/cache@v4 + with: + path: | + ~/.cache + key: benchmark-state-${{ hashFiles('benchmarks/**') }} + + - name: Install dependencies + run: pip install 'asv>=0.6.4' + + - name: Configure ASV + working-directory: ${{ env.ASV_DIR }} + run: asv machine --yes + + - name: Quick benchmark run + working-directory: ${{ env.ASV_DIR }} + run: asv run --dry-run --quick --show-stderr --verbose HEAD^! diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index 953edffff..1e0a49acc 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -25,7 +25,7 @@ // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], "build_command": [ "python -m pip install build", - "python -m build --wheel -o {build_cache_dir} {build_dir}", + "python -m build --wheel -o {build_cache_dir} {build_dir}" ], // List of branches to benchmark. If not provided, defaults to "master" @@ -73,7 +73,7 @@ "matrix": { "numpy": [""], "scipy": [""], - "squidpy": [""], + "squidpy": [""] }, // Combinations of libraries/python versions can be excluded/included @@ -123,7 +123,7 @@ // The directory (relative to the current directory) that the html tree // should be written to. If not provided, defaults to "html". - "html_dir": ".asv/html", + "html_dir": ".asv/html" // The number of characters to retain in the commit hashes. // "hash_length": 8, diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py index e1eaa6d7b..f95928865 100644 --- a/benchmarks/benchmarks/_utils.py +++ b/benchmarks/benchmarks/_utils.py @@ -69,16 +69,6 @@ def get_dataset(dataset: Dataset, *, layer: KeyX = None) -> tuple[AnnData, str | return adata, batch_key -def get_count_dataset(dataset: Dataset, *, layer: KeyCount = "counts") -> tuple[AnnData, str | None]: - adata, batch_key = _get_dataset_raw(dataset) - - adata.X = adata.layers.pop(layer) - # remove indicators that X was transformed - adata.uns.pop("log1p", None) - - return adata, batch_key - - def param_skipper(param_names: Sequence[str], params: tuple[Sequence[object], ...]) -> ParamSkipper: """Create a decorator that will skip all combinations that contain any of the given parameters. From f61c94f03133be1d288e893e289dbffb90816f40 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Mon, 14 Jul 2025 13:37:02 +0200 Subject: [PATCH 05/11] maybe try virtenv --- benchmarks/asv.conf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index 1e0a49acc..b8ab174a6 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -43,7 +43,7 @@ // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. - "environment_type": "conda", + "environment_type": "virtualenv", // timeout in seconds for installing any dependencies in environment // defaults to 10 min From 13b0440677e95c85d0ab1409cea920baf8a96d0a Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Mon, 14 Jul 2025 14:41:24 +0200 Subject: [PATCH 06/11] remove squidpy itself from the dependencies. and reduce the ypthon version --- .github/workflows/benchmark.yml | 2 +- benchmarks/asv.conf.json | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 7cf0d85cb..c8b988b79 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.13"] + python: ["3.12"] os: [ubuntu-latest] env: diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index b8ab174a6..df74e6ea5 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -72,8 +72,7 @@ // "matrix": { "numpy": [""], - "scipy": [""], - "squidpy": [""] + "scipy": [""] }, // Combinations of libraries/python versions can be excluded/included From 0df4dba18e8c345933e5f043a01600fffee714f2 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Mon, 14 Jul 2025 14:53:36 +0200 Subject: [PATCH 07/11] linting --- benchmarks/benchmarks/_utils.py | 12 ++++++------ .../benchmarks/preprocessing_co_occurence.py | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py index f95928865..2ee11a976 100644 --- a/benchmarks/benchmarks/_utils.py +++ b/benchmarks/benchmarks/_utils.py @@ -17,10 +17,10 @@ from anndata import AnnData - C = TypeVar("C", bound=Callable) + C = TypeVar("C", bound=Callable) # type: ignore[type-arg] class ParamSkipper(Protocol): - def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ... + def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ... # type: ignore[type-arg] Dataset = Literal["imc"] KeyX = Literal[None, "off-axis"] @@ -28,7 +28,7 @@ def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ... @cache def _imc() -> AnnData: - adata = sq.datasets.imc() + adata = sq.datasets.imc() # type: ignore[attr-defined] assert isinstance(adata.X, np.ndarray) assert not np.isfortran(adata.X) @@ -39,7 +39,7 @@ def imc() -> AnnData: return _imc().copy() -def to_off_axis(x: np.ndarray | csr_matrix | csc_matrix) -> np.ndarray | csc_matrix: +def to_off_axis(x: np.ndarray | csr_matrix | csc_matrix) -> np.ndarray | csc_matrix: # type: ignore[type-arg] if isinstance(x, csr_matrix): return x.tocsc() if isinstance(x, np.ndarray): @@ -87,13 +87,13 @@ def param_skipper(param_names: Sequence[str], params: tuple[Sequence[object], .. """ - def skip(**skipped: AbstractSet) -> Callable[[C], C]: + def skip(**skipped: AbstractSet) -> Callable[[C], C]: # type: ignore[type-arg] skipped_combs = [ tuple(record.values()) for record in (dict(zip(param_names, vals, strict=True)) for vals in itertools.product(*params)) if any(v in skipped.get(n, set()) for n, v in record.items()) ] # print(skipped_combs, file=sys.stderr) - return skip_for_params(skipped_combs) + return skip_for_params(skipped_combs) # type: ignore[no-any-return] return skip diff --git a/benchmarks/benchmarks/preprocessing_co_occurence.py b/benchmarks/benchmarks/preprocessing_co_occurence.py index 67b8044d3..fe0109197 100644 --- a/benchmarks/benchmarks/preprocessing_co_occurence.py +++ b/benchmarks/benchmarks/preprocessing_co_occurence.py @@ -7,8 +7,6 @@ from typing import TYPE_CHECKING -import squidpy as sq - from ._utils import get_dataset, param_skipper if TYPE_CHECKING: @@ -16,6 +14,8 @@ from ._utils import Dataset, KeyX +from squidpy.gr import co_occurrence # type: ignore[attr-defined] + # setup variables @@ -23,7 +23,7 @@ cluster_key: str | None -def setup(dataset: Dataset, layer: KeyX, *_): +def setup(dataset: Dataset, layer: KeyX, *_) -> None: # type: ignore[no-untyped-def] """Set up global variables before each benchmark.""" global adata, cluster_key adata, cluster_key = get_dataset(dataset, layer=layer) @@ -42,9 +42,9 @@ def setup(dataset: Dataset, layer: KeyX, *_): skip_when = param_skipper(param_names, params) -def time_co_occurrence(*_): - sq.gr.co_occurrence(adata, cluster_key=cluster_key) +def time_co_occurrence(*_) -> None: # type: ignore[no-untyped-def] + co_occurrence(adata, cluster_key=cluster_key) -def peakmem_co_occurrence(*_): - sq.gr.co_occurrence(adata, cluster_key=cluster_key) +def peakmem_co_occurrence(*_) -> None: # type: ignore[no-untyped-def] + co_occurrence(adata, cluster_key=cluster_key) From 46db32a0dcc89a065293855c3b4002ccae0eb48c Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 14 Aug 2025 14:44:22 +0200 Subject: [PATCH 08/11] fix mypy --- .mypy.ini | 1 - benchmarks/benchmarks/_utils.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.mypy.ini b/.mypy.ini index 7fb81b6dc..7c6728c8d 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -1,7 +1,6 @@ [mypy] mypy_path = squidpy python_version = 3.10 -plugins = numpy.typing.mypy_plugin ignore_errors = False warn_redundant_casts = True diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py index 2ee11a976..e8edc37da 100644 --- a/benchmarks/benchmarks/_utils.py +++ b/benchmarks/benchmarks/_utils.py @@ -39,7 +39,7 @@ def imc() -> AnnData: return _imc().copy() -def to_off_axis(x: np.ndarray | csr_matrix | csc_matrix) -> np.ndarray | csc_matrix: # type: ignore[type-arg] +def to_off_axis(x: np.ndarray | csr_matrix | csc_matrix) -> np.ndarray | csc_matrix: if isinstance(x, csr_matrix): return x.tocsc() if isinstance(x, np.ndarray): From 9c1b4723545d40f5a35f41940919bec07597fcaf Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 14 Aug 2025 14:44:37 +0200 Subject: [PATCH 09/11] use rattler --- .github/workflows/benchmark.yml | 107 ++++++++++++++++---------------- .prettierrc.yaml | 9 +++ benchmarks/asv.conf.json | 30 ++++----- 3 files changed, 78 insertions(+), 68 deletions(-) create mode 100644 .prettierrc.yaml diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index c8b988b79..42f1a15b7 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,62 +1,63 @@ name: Benchmark on: - push: - branches: [main] - pull_request: - branches: [main] + push: + branches: [main] + pull_request: + branches: [main] env: - FORCE_COLOR: "1" + FORCE_COLOR: "1" defaults: - run: - shell: bash -e {0} # -e to fail on error + run: + shell: bash -e {0} # -e to fail on error jobs: - benchmark: - runs-on: ${{ matrix.os }} - - strategy: - fail-fast: false - matrix: - python: ["3.12"] - os: [ubuntu-latest] - - env: - OS: ${{ matrix.os }} - PYTHON: ${{ matrix.python }} - ASV_DIR: "./benchmarks" - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Fetch main branch for `asv run`’s hash - run: git fetch origin main:main - if: ${{ github.ref_name != 'main' }} - - - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - cache: "pip" - - - name: Cache datasets - uses: actions/cache@v4 - with: - path: | - ~/.cache - key: benchmark-state-${{ hashFiles('benchmarks/**') }} - - - name: Install dependencies - run: pip install 'asv>=0.6.4' - - - name: Configure ASV - working-directory: ${{ env.ASV_DIR }} - run: asv machine --yes - - - name: Quick benchmark run - working-directory: ${{ env.ASV_DIR }} - run: asv run --dry-run --quick --show-stderr --verbose HEAD^! + benchmark: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + python: ["3.12"] + os: [ubuntu-latest] + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python }} + ASV_DIR: "${{ github.workspace }}/benchmarks" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Fetch main branch for `asv run`’s hash + run: git fetch origin main:main + if: ${{ github.ref_name != 'main' }} + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + cache: "pip" + + - name: Cache datasets + uses: actions/cache@v4 + with: + path: | + ~/.cache + key: benchmark-state-${{ hashFiles('benchmarks/**') }} + + - name: Install dependencies + # https://github.com/airspeed-velocity/asv/issues/1493 + run: pip install 'asv@git+https://github.com/airspeed-velocity/asv.git' py-rattler + + - name: Configure ASV + working-directory: ${{ env.ASV_DIR }} + run: asv machine --yes + + - name: Quick benchmark run + working-directory: ${{ env.ASV_DIR }} + run: asv run --dry-run --quick --show-stderr --verbose HEAD^! diff --git a/.prettierrc.yaml b/.prettierrc.yaml new file mode 100644 index 000000000..a7159cf26 --- /dev/null +++ b/.prettierrc.yaml @@ -0,0 +1,9 @@ +overrides: + # JSON with comments and trailing commas + - files: + - ".vscode/*.json" + - "benchmarks/*.json" + options: + parser: json5 + quoteProps: preserve + singleQuote: false diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index df74e6ea5..ee791203f 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -25,7 +25,7 @@ // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], "build_command": [ "python -m pip install build", - "python -m build --wheel -o {build_cache_dir} {build_dir}" + "python -m build --wheel -o {build_cache_dir} {build_dir}", ], // List of branches to benchmark. If not provided, defaults to "master" @@ -43,7 +43,7 @@ // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. - "environment_type": "virtualenv", + "environment_type": "rattler", // timeout in seconds for installing any dependencies in environment // defaults to 10 min @@ -60,19 +60,19 @@ // dependency packages in the specified order "conda_channels": ["conda-forge", "defaults"], - // The matrix of dependencies to test. Each key is the name of a - // package (in PyPI) and the values are version numbers. An empty - // list or empty string indicates to just test against the default - // (latest) version. null indicates that the package is to not be - // installed. If the package to be tested is only available from - // PyPi, and the 'environment_type' is conda, then you can preface - // the package name by 'pip+', and the package will be installed via - // pip (with all the conda available packages installed first, - // followed by the pip installed packages). - // "matrix": { - "numpy": [""], - "scipy": [""] + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + "req": {}, + // same for env variables + "env": {}, }, // Combinations of libraries/python versions can be excluded/included @@ -122,7 +122,7 @@ // The directory (relative to the current directory) that the html tree // should be written to. If not provided, defaults to "html". - "html_dir": ".asv/html" + "html_dir": ".asv/html", // The number of characters to retain in the commit hashes. // "hash_length": 8, From 8001825b71e10eae09f0f09cf7fa22c2607da14e Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 14 Aug 2025 14:56:17 +0200 Subject: [PATCH 10/11] use uv in install_command --- benchmarks/asv.conf.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index ee791203f..7c13ccdc6 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -21,10 +21,10 @@ // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. // - // "install_command": ["python -mpip install {wheel_file}"], + "install_command": ["python -m uv pip install {wheel_file}"], // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], "build_command": [ - "python -m pip install build", + "python -m uv pip install build", "python -m build --wheel -o {build_cache_dir} {build_dir}", ], From d3346ea89f036ef6622c3b72cfbc3a0d46847825 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 14 Aug 2025 14:59:20 +0200 Subject: [PATCH 11/11] back to venv, but try uv --- .github/workflows/benchmark.yml | 2 +- benchmarks/asv.conf.json | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 42f1a15b7..29888e24e 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -52,7 +52,7 @@ jobs: - name: Install dependencies # https://github.com/airspeed-velocity/asv/issues/1493 - run: pip install 'asv@git+https://github.com/airspeed-velocity/asv.git' py-rattler + run: pip install 'asv@git+https://github.com/airspeed-velocity/asv.git' virtualenv - name: Configure ASV working-directory: ${{ env.ASV_DIR }} diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index 7c13ccdc6..a9ab427ae 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -21,12 +21,12 @@ // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. // - "install_command": ["python -m uv pip install {wheel_file}"], - // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], "build_command": [ - "python -m uv pip install build", + "python -m pip install uv build", "python -m build --wheel -o {build_cache_dir} {build_dir}", ], + "install_command": ["python -m uv pip install {wheel_file}"], + // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "default" (for mercurial). @@ -43,7 +43,7 @@ // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. - "environment_type": "rattler", + "environment_type": "virtualenv", // timeout in seconds for installing any dependencies in environment // defaults to 10 min