Skip to content

Commit c1c5120

Browse files
committed
MNT add asv benchmarks
1 parent 1987a1a commit c1c5120

File tree

12 files changed

+4067
-1083
lines changed

12 files changed

+4067
-1083
lines changed

.github/workflows/asv.yml

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
name: ASV benchmarks
2+
3+
on:
4+
push:
5+
branches: ["main"]
6+
pull_request:
7+
branches: ["main"]
8+
9+
jobs:
10+
run-benchmarks:
11+
name: Benchmark on ${{ matrix.os }}
12+
runs-on: ${{ matrix.os }}
13+
permissions:
14+
contents: read
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
os: [ubuntu-latest, windows-latest, macos-latest, ubuntu-24.04-arm]
19+
20+
steps:
21+
- uses: actions/checkout@v4
22+
with:
23+
fetch-depth: 0
24+
25+
- uses: prefix-dev/[email protected]
26+
with:
27+
environments: dev
28+
cache: true
29+
30+
- name: Run benchmarks
31+
shell: bash
32+
run: |
33+
MACHINE=${{ matrix.os }} pixi run asv-build
34+
35+
- name: Upload benchmark results
36+
uses: actions/upload-artifact@v4
37+
with:
38+
name: asv-results-${{ matrix.os }}
39+
path: asv_benchmarks/results
40+
41+
publish-report:
42+
name: Build HTML report
43+
runs-on: ubuntu-latest
44+
needs: run-benchmarks
45+
permissions:
46+
contents: write
47+
48+
steps:
49+
- uses: actions/checkout@v4
50+
51+
- name: Prepare previous ASV results
52+
uses: actions/checkout@v4
53+
with:
54+
ref: gh-pages
55+
path: gh-pages
56+
57+
- name: Copy previous results
58+
run: |
59+
mkdir -p asv_benchmarks/results
60+
cp -r gh-pages/results/* asv_benchmarks/results/ 2>/dev/null || true
61+
62+
- name: Download all benchmark results
63+
uses: actions/download-artifact@v5
64+
with:
65+
pattern: asv-results-*
66+
67+
- name: Merge new benchmark results
68+
run: |
69+
for d in asv-results-*; do
70+
[ -d "$d" ] || continue
71+
cp -r "$d"/* "asv_benchmarks/results/"
72+
done
73+
74+
- uses: prefix-dev/[email protected]
75+
with:
76+
environments: dev
77+
cache: true
78+
79+
- name: Generate HTML report
80+
run: |
81+
pixi run asv-publish
82+
83+
- name: Copy results to publish directory
84+
run: |
85+
cp -r asv_benchmarks/results html/
86+
87+
- name: Deploy to GitHub Pages
88+
uses: peaceiris/actions-gh-pages@v4
89+
if: github.event_name == 'push'
90+
with:
91+
github_token: ${{ secrets.GITHUB_TOKEN }}
92+
publish_dir: ./html
93+
keep_files: true
94+
user_name: 'github-actions[bot]'
95+
user_email: 'github-actions[bot]@users.noreply.github.com'
96+
commit_message: ${{ github.event.head_commit.message }}

.github/workflows/publish-pypi.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
id-token: write
1919
steps:
2020
- name: Download artifacts
21-
uses: actions/download-artifact@v4
21+
uses: actions/download-artifact@v5
2222
with:
2323
path: dist/
2424
merge-multiple: true
@@ -27,9 +27,9 @@ jobs:
2727
- name: Publish distribution to PyPI
2828
if: github.event.release.prerelease == false
2929
uses: pypa/gh-action-pypi-publish@release/v1
30-
30+
3131
- name: get wasm dist artifacts
32-
uses: actions/download-artifact@v4
32+
uses: actions/download-artifact@v5
3333
with:
3434
name: wasm_wheel
3535
path: wasm/

asv_benchmarks/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
*__pycache__*
2+
env/
3+
html/
4+
results/
5+
benchmarks/cache/

asv_benchmarks/asv.conf.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"version": 1,
3+
"project": "fastcan",
4+
"project_url": "https://github.com/scikit-learn-contrib/fastcan",
5+
"show_commit_url": "https://github.com/scikit-learn-contrib/fastcan/commit/",
6+
"repo": "..",
7+
"branches": ["main"],
8+
"environment_type": "conda",
9+
"conda_channels": ["conda-forge"],
10+
"build_command": ["python -m build --wheel -o {build_cache_dir} {build_dir}"],
11+
"install_command": ["python -mpip install {wheel_file}"],
12+
"uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
13+
"pythons": ["3.13"],
14+
"matrix": {
15+
"scikit-learn": [""],
16+
"pandas": [""]
17+
}
18+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Benchmark suite for fastcan using ASV"""
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import pickle
2+
import timeit
3+
from abc import ABC, abstractmethod
4+
from pathlib import Path
5+
6+
7+
def get_estimator_path(benchmark, params):
8+
"""Get path of pickled fitted estimator"""
9+
path = Path(__file__).resolve().parent / "cache" / "estimators"
10+
11+
filename = (
12+
benchmark.__class__.__name__
13+
+ "_estimator_"
14+
+ "_".join(list(map(str, params)))
15+
+ ".pkl"
16+
)
17+
18+
return path / filename
19+
20+
21+
class Benchmark(ABC):
22+
"""Abstract base class for all the benchmarks"""
23+
24+
timer = timeit.default_timer # wall time
25+
timeout = 500
26+
27+
# save estimators
28+
current_path = Path(__file__).resolve().parent
29+
cache_path = current_path / "cache"
30+
cache_path.mkdir(exist_ok=True)
31+
(cache_path / "estimators").mkdir(exist_ok=True)
32+
33+
def setup(self, *params):
34+
"""Generate dataset and load the fitted estimator"""
35+
# This is run once per combination of parameters and per repeat so we
36+
# need to avoid doing expensive operations there.
37+
38+
self.X, self.X_val, self.y, self.y_val = self.make_data(params)
39+
40+
est_path = get_estimator_path(self, params)
41+
with est_path.open(mode="rb") as f:
42+
self.estimator = pickle.load(f)
43+
44+
@abstractmethod
45+
def make_data(self, params):
46+
"""Return the dataset for a combination of parameters"""
47+
# The datasets are cached using joblib.Memory so it's fast and can be
48+
# called for each repeat
49+
50+
@property
51+
@abstractmethod
52+
def params(self):
53+
pass
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from pathlib import Path
2+
3+
import numpy as np
4+
from joblib import Memory
5+
from sklearn.datasets import (
6+
fetch_openml,
7+
load_digits,
8+
make_regression,
9+
)
10+
from sklearn.model_selection import train_test_split
11+
from sklearn.preprocessing import MaxAbsScaler, StandardScaler
12+
13+
# memory location for caching datasets
14+
M = Memory(location=str(Path(__file__).resolve().parent / "cache"))
15+
16+
17+
@M.cache
18+
def _digits_dataset(n_samples=None, dtype=np.float32):
19+
X, y = load_digits(return_X_y=True)
20+
X = X.astype(dtype, copy=False)
21+
X = MaxAbsScaler().fit_transform(X)
22+
X = X[:n_samples]
23+
y = y[:n_samples]
24+
25+
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
26+
return X, X_val, y, y_val
27+
28+
29+
@M.cache
30+
def _synth_regression_dataset(n_samples=10000, n_features=200, dtype=np.float32):
31+
X, y = make_regression(
32+
n_samples=n_samples,
33+
n_features=n_features,
34+
n_informative=n_features // 10,
35+
noise=50,
36+
random_state=0,
37+
)
38+
X = X.astype(dtype, copy=False)
39+
X = StandardScaler().fit_transform(X)
40+
41+
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
42+
return X, X_val, y, y_val
43+
44+
45+
@M.cache
46+
def _co2_dataset(dtype=np.float32):
47+
X, y = fetch_openml(data_id=41187, return_X_y=True, as_frame=False)
48+
X = X[:, [1, 3]]
49+
X = X.astype(dtype, copy=False)
50+
n_samples = len(y)
51+
n_test = int(n_samples * 0.1)
52+
53+
mask_train = np.arange(n_samples) < (n_samples - n_test)
54+
X, X_val = X[mask_train], X[~mask_train]
55+
y_train, y_val = y[mask_train], y[~mask_train]
56+
return X, X_val, y_train, y_val
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import itertools
2+
import pickle
3+
4+
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
5+
from sklearn.linear_model import LinearRegression
6+
7+
from fastcan import FastCan
8+
9+
from .common import Benchmark, get_estimator_path
10+
from .datasets import _digits_dataset, _synth_regression_dataset
11+
12+
13+
class FastCanBenchmark(Benchmark):
14+
"""
15+
Benchmarks for FastCan.
16+
"""
17+
18+
param_names = ["task", "alg"]
19+
params = (["classif", "reg"], ["h", "eta"])
20+
21+
def setup_cache(self):
22+
"""Pickle a fitted estimator for all combinations of parameters"""
23+
# This is run once per benchmark class.
24+
25+
param_grid = list(itertools.product(*self.params))
26+
27+
for params in param_grid:
28+
_, alg = params
29+
X, _, y, _ = self.make_data(params)
30+
31+
if alg == "h":
32+
eta = False
33+
else:
34+
eta = True
35+
estimator = FastCan(
36+
n_features_to_select=20,
37+
eta=eta,
38+
)
39+
estimator.fit(X, y)
40+
41+
est_path = get_estimator_path(self, params)
42+
with est_path.open(mode="wb") as f:
43+
pickle.dump(estimator, f)
44+
45+
def make_data(self, params):
46+
task, _ = params
47+
if task == "classif":
48+
return _digits_dataset()
49+
return _synth_regression_dataset()
50+
51+
def time_fit(self, *args):
52+
self.estimator.fit(self.X, self.y)
53+
54+
def peakmem_fit(self, *args):
55+
self.estimator.fit(self.X, self.y)
56+
57+
def track_train_score(self, *args):
58+
task, _ = args
59+
X_t = self.estimator.transform(self.X)
60+
if task == "classif":
61+
clf = LinearDiscriminantAnalysis()
62+
clf.fit(X_t, self.y)
63+
return float(clf.score(X_t, self.y))
64+
else:
65+
reg = LinearRegression()
66+
reg.fit(X_t, self.y)
67+
return float(reg.score(X_t, self.y))
68+
69+
def track_test_score(self, *args):
70+
task, _ = args
71+
X_t = self.estimator.transform(self.X_val)
72+
if task == "classif":
73+
clf = LinearDiscriminantAnalysis()
74+
clf.fit(X_t, self.y_val)
75+
return float(clf.score(X_t, self.y_val))
76+
else:
77+
reg = LinearRegression()
78+
reg.fit(X_t, self.y_val)
79+
return float(reg.score(X_t, self.y_val))
80+
81+
def time_transform(self, *args):
82+
self.estimator.transform(self.X)
83+
84+
def peakmem_transform(self, *args):
85+
self.estimator.transform(self.X)

0 commit comments

Comments
 (0)