Skip to content

Commit 274600d

Browse files
committed
init
1 parent 349b75a commit 274600d

File tree

6 files changed

+400
-0
lines changed

6 files changed

+400
-0
lines changed

.github/workflows/benchmark.yml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
name: Benchmark
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
env:
10+
FORCE_COLOR: "1"
11+
12+
defaults:
13+
run:
14+
shell: bash -e {0} # -e to fail on error
15+
16+
jobs:
17+
benchmark:
18+
runs-on: ${{ matrix.os }}
19+
20+
strategy:
21+
fail-fast: false
22+
matrix:
23+
python: ["3.13"]
24+
os: [ubuntu-latest]
25+
26+
env:
27+
OS: ${{ matrix.os }}
28+
PYTHON: ${{ matrix.python }}
29+
ASV_DIR: "./benchmarks"
30+
31+
steps:
32+
- uses: actions/checkout@v4
33+
with:
34+
fetch-depth: 0
35+
36+
- name: Fetch main branch for `asv run`’s hash
37+
run: git fetch origin main:main
38+
if: ${{ github.ref_name != 'main' }}
39+
40+
- name: Set up Python ${{ matrix.python }}
41+
uses: actions/setup-python@v5
42+
with:
43+
python-version: ${{ matrix.python }}
44+
cache: 'pip'
45+
46+
- name: Cache datasets
47+
uses: actions/cache@v4
48+
with:
49+
path: |
50+
~/.cache
51+
key: benchmark-state-${{ hashFiles('benchmarks/**') }}
52+
53+
- name: Install dependencies
54+
run: pip install 'asv>=0.6.4'
55+
56+
- name: Configure ASV
57+
working-directory: ${{ env.ASV_DIR }}
58+
run: asv machine --yes
59+
60+
- name: Quick benchmark run
61+
working-directory: ${{ env.ASV_DIR }}
62+
run: asv run --dry-run --quick --show-stderr --verbose HEAD^!

benchmarks/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Squidpy Benchmarks
2+
3+
This directory contains code for benchmarking Squidpy using [asv][].
4+
5+
The functionality is checked using the [`benchmark.yml`][] workflow.
6+
Benchmarks are run using the [benchmark bot][].
7+
8+
[asv]: https://asv.readthedocs.io/
9+
[`benchmark.yml`]: ../.github/workflows/benchmark.yml
10+
[benchmark bot]: https://github.com/apps/scverse-benchmark
11+
12+
## Data processing in benchmarks
13+
14+
Each dataset is processed so it has
15+
16+
- `.X` (containing data in C/row-major format) and `.layers['off-axis']` (containing data in FORTRAN/column-major format) with log-transformed data
17+
18+
The benchmarks are set up so the `layer` parameter indicates the layer that will be moved into `.X` before the benchmark.
19+
That way, we don’t need to add `layer=layer` everywhere.

benchmarks/asv.conf.json

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
{
2+
// The version of the config file format. Do not change, unless
3+
// you know what you are doing.
4+
"version": 1,
5+
6+
// The name of the project being benchmarked
7+
"project": "squidpy",
8+
9+
// The project's homepage
10+
"project_url": "https://squidpy.readthedocs.io/",
11+
12+
// The URL or local path of the source code repository for the
13+
// project being benchmarked
14+
"repo": "..",
15+
16+
// The Python project's subdirectory in your repo. If missing or
17+
// the empty string, the project is assumed to be located at the root
18+
// of the repository.
19+
// "repo_subdir": "",
20+
21+
// Customizable commands for building, installing, and
22+
// uninstalling the project. See asv.conf.json documentation.
23+
//
24+
// "install_command": ["python -mpip install {wheel_file}"],
25+
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
26+
"build_command": [
27+
"python -m pip install build",
28+
"python -m build --wheel -o {build_cache_dir} {build_dir}",
29+
],
30+
31+
// List of branches to benchmark. If not provided, defaults to "master"
32+
// (for git) or "default" (for mercurial).
33+
"branches": ["main"], // for git
34+
35+
// The DVCS being used. If not set, it will be automatically
36+
// determined from "repo" by looking at the protocol in the URL
37+
// (if remote), or by looking for special directories, such as
38+
// ".git" (if local).
39+
"dvcs": "git",
40+
41+
// The tool to use to create environments. May be "conda",
42+
// "virtualenv" or other value depending on the plugins in use.
43+
// If missing or the empty string, the tool will be automatically
44+
// determined by looking for tools on the PATH environment
45+
// variable.
46+
"environment_type": "conda",
47+
48+
// timeout in seconds for installing any dependencies in environment
49+
// defaults to 10 min
50+
//"install_timeout": 600,
51+
52+
// the base URL to show a commit for the project.
53+
"show_commit_url": "https://github.com/squidpy/scanpy/commit/",
54+
55+
// The Pythons you'd like to test against. If not provided, defaults
56+
// to the current version of Python used to run `asv`.
57+
// "pythons": ["3.11", "3.13"],
58+
59+
// The list of conda channel names to be searched for benchmark
60+
// dependency packages in the specified order
61+
"conda_channels": ["conda-forge", "defaults"],
62+
63+
// The matrix of dependencies to test. Each key is the name of a
64+
// package (in PyPI) and the values are version numbers. An empty
65+
// list or empty string indicates to just test against the default
66+
// (latest) version. null indicates that the package is to not be
67+
// installed. If the package to be tested is only available from
68+
// PyPi, and the 'environment_type' is conda, then you can preface
69+
// the package name by 'pip+', and the package will be installed via
70+
// pip (with all the conda available packages installed first,
71+
// followed by the pip installed packages).
72+
//
73+
"matrix": {
74+
"numpy": [""],
75+
"scipy": [""],
76+
"squidpy": [""]
77+
},
78+
79+
// Combinations of libraries/python versions can be excluded/included
80+
// from the set to test. Each entry is a dictionary containing additional
81+
// key-value pairs to include/exclude.
82+
//
83+
// An exclude entry excludes entries where all values match. The
84+
// values are regexps that should match the whole string.
85+
//
86+
// An include entry adds an environment. Only the packages listed
87+
// are installed. The 'python' key is required. The exclude rules
88+
// do not apply to includes.
89+
//
90+
// In addition to package names, the following keys are available:
91+
//
92+
// - python
93+
// Python version, as in the *pythons* variable above.
94+
// - environment_type
95+
// Environment type, as above.
96+
// - sys_platform
97+
// Platform, as in sys.platform. Possible values for the common
98+
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
99+
//
100+
// "exclude": [
101+
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
102+
// {"environment_type": "conda", "six": null}, // don't run without six on conda
103+
// ],
104+
//
105+
// "include": [
106+
// // additional env for python2.7
107+
// {"python": "2.7", "numpy": "1.8"},
108+
// // additional env if run on windows+conda
109+
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
110+
// ],
111+
112+
// The directory (relative to the current directory) that benchmarks are
113+
// stored in. If not provided, defaults to "benchmarks"
114+
// "benchmark_dir": "benchmarks",
115+
116+
// The directory (relative to the current directory) to cache the Python
117+
// environments in. If not provided, defaults to "env"
118+
"env_dir": ".asv/env",
119+
120+
// The directory (relative to the current directory) that raw benchmark
121+
// results are stored in. If not provided, defaults to "results".
122+
"results_dir": ".asv/results",
123+
124+
// The directory (relative to the current directory) that the html tree
125+
// should be written to. If not provided, defaults to "html".
126+
"html_dir": ".asv/html",
127+
128+
// The number of characters to retain in the commit hashes.
129+
// "hash_length": 8,
130+
131+
// `asv` will cache results of the recent builds in each
132+
// environment, making them faster to install next time. This is
133+
// the number of builds to keep, per environment.
134+
// "build_cache_size": 2,
135+
136+
// The commits after which the regression search in `asv publish`
137+
// should start looking for regressions. Dictionary whose keys are
138+
// regexps matching to benchmark names, and values corresponding to
139+
// the commit (exclusive) after which to start looking for
140+
// regressions. The default is to start from the first commit
141+
// with results. If the commit is `null`, regression detection is
142+
// skipped for the matching benchmark.
143+
//
144+
// "regressions_first_commits": {
145+
// "some_benchmark": "352cdf", // Consider regressions only after this commit
146+
// "another_benchmark": null, // Skip regression detection altogether
147+
// },
148+
149+
// The thresholds for relative change in results, after which `asv
150+
// publish` starts reporting regressions. Dictionary of the same
151+
// form as in ``regressions_first_commits``, with values
152+
// indicating the thresholds. If multiple entries match, the
153+
// maximum is taken. If no entry matches, the default is 5%.
154+
//
155+
// "regressions_thresholds": {
156+
// "some_benchmark": 0.01, // Threshold of 1%
157+
// "another_benchmark": 0.5, // Threshold of 50%
158+
// },
159+
}

benchmarks/benchmarks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""ASV benchmark suite for squidpy."""

benchmarks/benchmarks/_utils.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
from __future__ import annotations
2+
3+
import itertools
4+
from functools import cache
5+
from typing import TYPE_CHECKING
6+
7+
import numpy as np
8+
from asv_runner.benchmarks.mark import skip_for_params
9+
from scanpy._compat import CSCBase, CSRBase
10+
11+
import squidpy as sq
12+
13+
if TYPE_CHECKING:
14+
from collections.abc import Callable, Sequence
15+
from collections.abc import Set as AbstractSet
16+
from typing import Literal, Protocol, TypeVar
17+
18+
from anndata import AnnData
19+
20+
C = TypeVar("C", bound=Callable)
21+
22+
class ParamSkipper(Protocol):
23+
def __call__(self, **skipped: AbstractSet) -> Callable[[C], C]: ...
24+
25+
Dataset = Literal["imc"]
26+
KeyX = Literal[None, "off-axis"]
27+
28+
29+
@cache
30+
def _imc() -> AnnData:
31+
adata = sq.datasets.imc()
32+
assert isinstance(adata.X, np.ndarray)
33+
assert not np.isfortran(adata.X)
34+
35+
return adata
36+
37+
38+
def imc() -> AnnData:
39+
return _imc().copy()
40+
41+
42+
def to_off_axis(x: np.ndarray | CSRBase) -> np.ndarray | CSCBase:
43+
if isinstance(x, CSRBase):
44+
return x.tocsc()
45+
if isinstance(x, np.ndarray):
46+
assert not np.isfortran(x)
47+
return x.copy(order="F")
48+
msg = f"Unexpected type {type(x)}"
49+
raise TypeError(msg)
50+
51+
52+
def _get_dataset_raw(dataset: Dataset) -> tuple[AnnData, str | None]:
53+
match dataset:
54+
case "imc":
55+
adata, cluster_key = imc(), "cell type"
56+
case _:
57+
msg = f"Unknown dataset {dataset}"
58+
raise AssertionError(msg)
59+
60+
adata.layers["off-axis"] = to_off_axis(adata.X)
61+
62+
return adata, cluster_key
63+
64+
65+
def get_dataset(dataset: Dataset, *, layer: KeyX = None) -> tuple[AnnData, str | None]:
66+
adata, batch_key = _get_dataset_raw(dataset)
67+
if layer is not None:
68+
adata.X = adata.layers.pop(layer)
69+
return adata, batch_key
70+
71+
72+
def get_count_dataset(dataset: Dataset, *, layer: KeyCount = "counts") -> tuple[AnnData, str | None]:
73+
adata, batch_key = _get_dataset_raw(dataset)
74+
75+
adata.X = adata.layers.pop(layer)
76+
# remove indicators that X was transformed
77+
adata.uns.pop("log1p", None)
78+
79+
return adata, batch_key
80+
81+
82+
def param_skipper(param_names: Sequence[str], params: tuple[Sequence[object], ...]) -> ParamSkipper:
83+
"""Create a decorator that will skip all combinations that contain any of the given parameters.
84+
85+
Examples
86+
--------
87+
>>> param_names = ["letters", "numbers"]
88+
>>> params = [["a", "b"], [3, 4, 5]]
89+
>>> skip_when = param_skipper(param_names, params)
90+
91+
>>> @skip_when(letters={"a"}, numbers={3})
92+
... def func(a, b):
93+
... print(a, b)
94+
>>> run_as_asv_benchmark(func)
95+
b 4
96+
b 5
97+
98+
"""
99+
100+
def skip(**skipped: AbstractSet) -> Callable[[C], C]:
101+
skipped_combs = [
102+
tuple(record.values())
103+
for record in (dict(zip(param_names, vals, strict=True)) for vals in itertools.product(*params))
104+
if any(v in skipped.get(n, set()) for n, v in record.items())
105+
]
106+
# print(skipped_combs, file=sys.stderr)
107+
return skip_for_params(skipped_combs)
108+
109+
return skip

0 commit comments

Comments
 (0)