Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions asv_benchmarks/benchmarks/fastcan.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,7 @@ def setup_cache(self):
else:
eta = False
beam_width = 10
estimator = FastCan(
n_features_to_select=20,
eta=eta,
beam_width=beam_width
)
estimator = FastCan(n_features_to_select=20, eta=eta, beam_width=beam_width)
estimator.fit(X, y)

est_path = get_estimator_path(self, params)
Expand Down
8 changes: 2 additions & 6 deletions fastcan/_beam.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,14 @@ def _beam_search(

for i in range(n_features_to_select - n_inclusions):
if i == 0:
mask, X_selected = _prepare_candidates(
X, mask_exclude, indices_include
)
mask, X_selected = _prepare_candidates(X, mask_exclude, indices_include)
if X_selected.shape[1] == 0:
beams_scores = np.sum((X.T @ V) ** 2, axis=1)
beams_scores[mask] = 0
else:
W_selected = orth(X_selected)
selected_score = np.sum((W_selected.T @ V) ** 2)
beams_scores = _mgs_ssc(
X, V, W_selected, mask, selected_score, tol
)
beams_scores = _mgs_ssc(X, V, W_selected, mask, selected_score, tol)
beams_selected_ids = [indices_include for _ in range(beam_width)]
beams_selected_ids, top_k_scores = _select_top_k(
beams_scores[None, :],
Expand Down
15 changes: 13 additions & 2 deletions fastcan/_minibatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
# Authors: The fastcan developers
# SPDX-License-Identifier: MIT

import warnings
from numbers import Integral, Real

import numpy as np
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
from sklearn.utils._param_validation import Interval, validate_params
from sklearn.utils.validation import check_X_y

from ._beam import _safe_normalize
from ._cancorr_fast import _greedy_search # type: ignore[attr-defined]
from ._fastcan import _prepare_search

Expand Down Expand Up @@ -101,11 +103,18 @@ def minibatch(X, y, n_features_to_select=1, batch_size=1, tol=0.01, verbose=1):
)
)
X_transformed_ = X - X.mean(0)
y_transformed_ = y - y.mean(0)
y_transformed_, const_mask = _safe_normalize(y - y.mean(0))
if const_mask.any():
warnings.warn(
f"Contain constant targets, whose indices are {np.where(const_mask)[0]}.",
UserWarning,
)
indices_include = np.zeros(0, dtype=int) # just an empty array
indices_select = np.zeros(0, dtype=int)

for i in range(n_outputs):
if const_mask[i]:
continue
y_i = y_transformed_[:, [i]]
n_selected_i = 0
while n_to_select_split[i] > n_selected_i:
Expand Down Expand Up @@ -137,7 +146,9 @@ def minibatch(X, y, n_features_to_select=1, batch_size=1, tol=0.01, verbose=1):
n_selected_i += batch_size_temp
if verbose == 1:
print(
f"Progress: {indices_select.size}/{n_features_to_select}", end="\r"
f"Progress: {indices_select.size}/{n_features_to_select}, "
f"Batch SSC: {scores.sum():.5f}",
end="\r",
)
if verbose == 1:
print()
Expand Down
4 changes: 2 additions & 2 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ scikit-learn = ">=1.6.0"
fastcan = { path = ".", editable = true }

[tasks]
time-h = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import FastCan; X = np.random.rand(3000, 100); y = np.random.rand(3000, 20)' 's = FastCan(100, verbose=0).fit(X, y)'"
time-eta = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import FastCan; X = np.random.rand(3000, 100); y = np.random.rand(3000, 20)' 's = FastCan(100, eta=True, verbose=0).fit(X, y)'"
time-h = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import FastCan; X = np.random.rand(30000, 100); y = np.random.rand(30000, 20)' 's = FastCan(100, verbose=0).fit(X, y)'"
time-eta = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import FastCan; X = np.random.rand(30000, 100); y = np.random.rand(30000, 20)' 's = FastCan(100, eta=True, verbose=0).fit(X, y)'"
profile-minibatch = { cmd = '''python -c "import cProfile; import numpy as np; from fastcan import minibatch; X = np.random.rand(100, 3000); y = np.random.rand(100, 20); cProfile.run('minibatch(X, y, 1000, 10, verbose=0)', sort='{{ SORT }}')"''', args = [{ arg = "SORT", default = "cumtime" }] }
time-narx = '''python -m timeit -n 1 -s "import numpy as np; from fastcan.narx import make_narx; rng = np.random.default_rng(5); X = rng.random((1000, 10)); y = rng.random((1000, 2)); m = make_narx(X, y, 10, max_delay=2, poly_degree=2, verbose=0)" "m.fit(X, y, coef_init='one_step_ahead', verbose=1)"'''
profile-narx = { cmd = '''python -c "import cProfile; import numpy as np; from fastcan.narx import make_narx; rng = np.random.default_rng(8); X = rng.random((3000, 3)); y = rng.random((3000, 3)); m = make_narx(X, y, 10, max_delay=10, poly_degree=2, verbose=0); cProfile.run('m.fit(X, y, coef_init=[0]*33)', sort='{{ SORT }}')"''', args = [{ arg = "SORT", default = "tottime" }] }
Expand Down
39 changes: 37 additions & 2 deletions tests/test_minibatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import numpy as np
import pytest
from sklearn.cluster import KMeans
from sklearn.datasets import load_iris, make_classification
from sklearn.datasets import load_iris, make_classification, make_regression
from sklearn.preprocessing import OneHotEncoder

from fastcan import minibatch
from fastcan.utils import ssc


def test_data_pruning():
Expand Down Expand Up @@ -60,7 +61,7 @@ def test_select_minibatch_cls():
assert indices.size == n_to_select


def test_minibatch_error():
def test_minibatch_error_warning():
# Test refine raise error.
n_samples = 200
n_features = 20
Expand All @@ -83,3 +84,37 @@ def test_minibatch_error():

with pytest.raises(ValueError, match=r"n_features_to_select .*"):
_ = minibatch(X, y, n_features + 1, batch_size=3)

Y = OneHotEncoder(sparse_output=False).fit_transform(y.reshape(-1, 1))
Y[:, 0] = 1
with pytest.warns(
UserWarning, match=r"Contain constant targets, whose indices are .*"
):
_ = minibatch(X, Y, 5, batch_size=3)


def test_minibatch_ssc_aligned(capsys):
# Test whether ssc of minibatch aligns with the true ssc score
n_features = 20
n_targets = 5
n_to_select = 10
X, y = make_regression(
n_samples=100,
n_features=n_features,
n_informative=10,
n_targets=n_targets,
noise=0.1,
random_state=0,
)

# The last batch of features are selected for the last target.
# The number of features selected per target is n_to_select // n_targets
n_features_per_target = n_to_select // n_targets
indices = minibatch(X, y, n_to_select, batch_size=n_features_per_target + 1)
captured = capsys.readouterr()

gtruth_ssc = ssc(X[:, indices[-n_features_per_target:]], y[:, [-1]])
assert (
f"Progress: {n_to_select}/{n_to_select}, "
f"Batch SSC: {gtruth_ssc:.5f}" in captured.out
)