Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: Benchmark

on:
push:
branches: [main]
pull_request:
branches: [main]

env:
FORCE_COLOR: "1"

defaults:
run:
shell: bash -e {0} # -e to fail on error

jobs:
benchmark:
runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
python: ["3.12"]
os: [ubuntu-latest]

env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python }}
ASV_DIR: "${{ github.workspace }}/benchmarks"

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Fetch main branch for `asv run`’s hash
run: git fetch origin main:main
if: ${{ github.ref_name != 'main' }}

- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
cache: "pip"

- name: Cache datasets
uses: actions/cache@v4
with:
path: |
~/.cache
key: benchmark-state-${{ hashFiles('benchmarks/**') }}

- name: Install dependencies
# https://github.com/airspeed-velocity/asv/issues/1493
run: pip install 'asv@git+https://github.com/airspeed-velocity/asv.git' virtualenv

- name: Configure ASV
working-directory: ${{ env.ASV_DIR }}
run: asv machine --yes

- name: Quick benchmark run
working-directory: ${{ env.ASV_DIR }}
run: asv run --dry-run --quick --show-stderr --verbose HEAD^!
1 change: 0 additions & 1 deletion .mypy.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[mypy]
mypy_path = squidpy
python_version = 3.10
plugins = numpy.typing.mypy_plugin

ignore_errors = False
warn_redundant_casts = True
Expand Down
9 changes: 9 additions & 0 deletions .prettierrc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
overrides:
# JSON with comments and trailing commas
- files:
- ".vscode/*.json"
- "benchmarks/*.json"
options:
parser: json5
quoteProps: preserve
singleQuote: false
19 changes: 19 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Squidpy Benchmarks

This directory contains code for benchmarking Squidpy using [asv][].

The functionality is checked using the [`benchmark.yml`][] workflow.
Benchmarks are run using the [benchmark bot][].

[asv]: https://asv.readthedocs.io/
[`benchmark.yml`]: ../.github/workflows/benchmark.yml
[benchmark bot]: https://github.com/apps/scverse-benchmark

## Data processing in benchmarks

Each dataset is processed so it has

- `.X` (containing data in C/row-major format) and `.layers['off-axis']` (containing data in FORTRAN/column-major format) with log-transformed data

The benchmarks are set up so the `layer` parameter indicates the layer that will be moved into `.X` before the benchmark.
That way, we don’t need to add `layer=layer` everywhere.
158 changes: 158 additions & 0 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
// The version of the config file format. Do not change, unless
// you know what you are doing.
"version": 1,

// The name of the project being benchmarked
"project": "squidpy",

// The project's homepage
"project_url": "https://squidpy.readthedocs.io/",

// The URL or local path of the source code repository for the
// project being benchmarked
"repo": "..",

// The Python project's subdirectory in your repo. If missing or
// the empty string, the project is assumed to be located at the root
// of the repository.
// "repo_subdir": "",

// Customizable commands for building, installing, and
// uninstalling the project. See asv.conf.json documentation.
//
"build_command": [
"python -m pip install uv build",
"python -m build --wheel -o {build_cache_dir} {build_dir}",
],
"install_command": ["python -m uv pip install {wheel_file}"],
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],

// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
"branches": ["main"], // for git

// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
"dvcs": "git",

// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "virtualenv",

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
//"install_timeout": 600,

// the base URL to show a commit for the project.
"show_commit_url": "https://github.com/scverse/squidpy/commit/",

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
// "pythons": ["3.11", "3.13"],

// The list of conda channel names to be searched for benchmark
// dependency packages in the specified order
"conda_channels": ["conda-forge", "defaults"],

"matrix": {
// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list or empty string indicates to just test against the default
// (latest) version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed via
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
"req": {},
// same for env variables
"env": {},
},

// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "six": null}, // don't run without six on conda
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "numpy": "1.8"},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
// ],

// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
// "benchmark_dir": "benchmarks",

// The directory (relative to the current directory) to cache the Python
// environments in. If not provided, defaults to "env"
"env_dir": ".asv/env",

// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
"results_dir": ".asv/results",

// The directory (relative to the current directory) that the html tree
// should be written to. If not provided, defaults to "html".
"html_dir": ".asv/html",

// The number of characters to retain in the commit hashes.
// "hash_length": 8,

// `asv` will cache results of the recent builds in each
// environment, making them faster to install next time. This is
// the number of builds to keep, per environment.
// "build_cache_size": 2,

// The commits after which the regression search in `asv publish`
// should start looking for regressions. Dictionary whose keys are
// regexps matching to benchmark names, and values corresponding to
// the commit (exclusive) after which to start looking for
// regressions. The default is to start from the first commit
// with results. If the commit is `null`, regression detection is
// skipped for the matching benchmark.
//
// "regressions_first_commits": {
// "some_benchmark": "352cdf", // Consider regressions only after this commit
// "another_benchmark": null, // Skip regression detection altogether
// },

// The thresholds for relative change in results, after which `asv
// publish` starts reporting regressions. Dictionary of the same
// form as in ``regressions_first_commits``, with values
// indicating the thresholds. If multiple entries match, the
// maximum is taken. If no entry matches, the default is 5%.
//
// "regressions_thresholds": {
// "some_benchmark": 0.01, // Threshold of 1%
// "another_benchmark": 0.5, // Threshold of 50%
// },
}
1 change: 1 addition & 0 deletions benchmarks/benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""ASV benchmark suite for squidpy."""
99 changes: 99 additions & 0 deletions benchmarks/benchmarks/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from __future__ import annotations

import itertools
from functools import cache
from typing import TYPE_CHECKING

import numpy as np
from asv_runner.benchmarks.mark import skip_for_params
from scipy.sparse import csc_matrix, csr_matrix

import squidpy as sq

if TYPE_CHECKING:
from collections.abc import Callable, Sequence
from collections.abc import Set as AbstractSet
from typing import Literal, Protocol, TypeVar

from anndata import AnnData

C = TypeVar("C", bound=Callable) # type: ignore[type-arg]

class ParamSkipper(Protocol):
def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ... # type: ignore[type-arg]

Dataset = Literal["imc"]
KeyX = Literal[None, "off-axis"]


@cache
def _imc() -> AnnData:
adata = sq.datasets.imc() # type: ignore[attr-defined]
assert isinstance(adata.X, np.ndarray)
assert not np.isfortran(adata.X)

return adata


def imc() -> AnnData:
return _imc().copy()


def to_off_axis(x: np.ndarray | csr_matrix | csc_matrix) -> np.ndarray | csc_matrix:
if isinstance(x, csr_matrix):
return x.tocsc()
if isinstance(x, np.ndarray):
assert not np.isfortran(x)
return x.copy(order="F")
msg = f"Unexpected type {type(x)}"
raise TypeError(msg)


def _get_dataset_raw(dataset: Dataset) -> tuple[AnnData, str | None]:
match dataset:
case "imc":
adata, cluster_key = imc(), "cell type"
case _:
msg = f"Unknown dataset {dataset}"
raise AssertionError(msg)

adata.layers["off-axis"] = to_off_axis(adata.X)

return adata, cluster_key


def get_dataset(dataset: Dataset, *, layer: KeyX = None) -> tuple[AnnData, str | None]:
adata, batch_key = _get_dataset_raw(dataset)
if layer is not None:
adata.X = adata.layers.pop(layer)
return adata, batch_key


def param_skipper(param_names: Sequence[str], params: tuple[Sequence[object], ...]) -> ParamSkipper:
"""Create a decorator that will skip all combinations that contain any of the given parameters.

Examples
--------
>>> param_names = ["letters", "numbers"]
>>> params = [["a", "b"], [3, 4, 5]]
>>> skip_when = param_skipper(param_names, params)

>>> @skip_when(letters={"a"}, numbers={3})
... def func(a, b):
... print(a, b)
>>> run_as_asv_benchmark(func)
b 4
b 5

"""

def skip(**skipped: AbstractSet) -> Callable[[C], C]: # type: ignore[type-arg]
skipped_combs = [
tuple(record.values())
for record in (dict(zip(param_names, vals, strict=True)) for vals in itertools.product(*params))
if any(v in skipped.get(n, set()) for n, v in record.items())
]
# print(skipped_combs, file=sys.stderr)
return skip_for_params(skipped_combs) # type: ignore[no-any-return]

return skip
Loading