Skip to content

Commit 8e4a01f

Browse files
Merge pull request #24 from MatthewSZhang/minibatch
MNT change extend to minibatch
2 parents b53076a + 9448afb commit 8e4a01f

File tree

10 files changed

+480
-481
lines changed

10 files changed

+480
-481
lines changed

.readthedocs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ version: 2
22
build:
33
os: ubuntu-22.04
44
tools:
5-
python: "3.12"
5+
python: "3.13"
66

77
sphinx:
88
configuration: doc/conf.py

doc/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ API Reference
1919

2020
FastCan
2121
refine
22-
extend
22+
minibatch
2323
ssc
2424
ols
2525
make_poly_ids

fastcan/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
The :mod:`fastcan` module implements algorithms, including
33
"""
44

5-
from ._extend import extend
65
from ._fastcan import FastCan
6+
from ._minibatch import minibatch
77
from ._narx import (
88
Narx,
99
make_narx,
@@ -21,7 +21,7 @@
2121
"ssc",
2222
"ols",
2323
"refine",
24-
"extend",
24+
"minibatch",
2525
"make_narx",
2626
"print_narx",
2727
"Narx",

fastcan/_extend.py

Lines changed: 0 additions & 120 deletions
This file was deleted.

fastcan/_fastcan.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,6 @@ def _get_support_mask(self):
287287

288288

289289
def _prepare_search(n_features, n_features_to_select, indices_include, indices_exclude):
290-
""" """
291290
# initiated with -1
292291
indices = np.full(n_features_to_select, -1, dtype=np.intc, order="F")
293292
indices[: indices_include.size] = indices_include

fastcan/_minibatch.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
"""
2+
Feature selection with mini-batch
3+
"""
4+
5+
from copy import deepcopy
6+
from numbers import Integral
7+
8+
import numpy as np
9+
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
10+
from sklearn.utils._param_validation import Interval, validate_params
11+
from sklearn.utils.validation import check_X_y
12+
13+
from ._cancorr_fast import _forward_search # type: ignore
14+
from ._fastcan import FastCan, _prepare_search
15+
16+
17+
@validate_params(
18+
{
19+
"X": ["array-like"],
20+
"y": ["array-like"],
21+
"n_features_to_select": [
22+
Interval(Integral, 1, None, closed="left"),
23+
],
24+
"batch_size": [
25+
Interval(Integral, 1, None, closed="left"),
26+
],
27+
"verbose": ["verbose"],
28+
},
29+
prefer_skip_nested_validation=False,
30+
)
31+
def minibatch(X, y, n_features_to_select=1, batch_size=1, verbose=1):
32+
"""FastCan selection with mini batches.
33+
34+
It is suitable for selecting a very large number of features
35+
even larger than the number of samples.
36+
37+
Similar to the correlation filter which selects each feature without considering
38+
the redundancy, the function selects features in mini-batch and the
39+
redundancy between the two mini-batches will be ignored.
40+
41+
Parameters
42+
----------
43+
X : array-like of shape (n_samples, n_features)
44+
Feature matrix.
45+
46+
y : array-like of shape (n_samples, n_outputs)
47+
Target matrix.
48+
49+
n_features_to_select : int, default=1
50+
The parameter is the absolute number of features to select.
51+
52+
batch_size : int, default=1
53+
The number of features in a mini-batch.
54+
It is recommended that batch_size be less than n_samples.
55+
56+
verbose : int, default=1
57+
The verbosity level.
58+
59+
Returns
60+
-------
61+
indices : ndarray of shape (n_features_to_select,), dtype=int
62+
The indices of the selected features.
63+
64+
Examples
65+
--------
66+
>>> from fastcan import minibatch
67+
>>> X = [[1, 1, 0], [0.01, 0, 0], [-1, 0, 1], [0, 0, 0]]
68+
>>> y = [1, 0, -1, 0]
69+
>>> indices = minibatch(X, y, 3, batch_size=2, verbose=0)
70+
>>> print(f"Indices: {indices}")
71+
Indices: [0 1 2]
72+
"""
73+
X, y = check_X_y(X, y, ensure_2d=True, multi_output=True)
74+
if y.ndim == 1:
75+
y = y.reshape(-1, 1)
76+
77+
n_features = X.shape[1]
78+
n_outputs = y.shape[1]
79+
80+
if n_features_to_select > n_features:
81+
raise ValueError(
82+
f"n_features_to_select {n_features_to_select} "
83+
f"must be <= n_features {n_features}."
84+
)
85+
86+
n_threads = _openmp_effective_n_threads()
87+
88+
n_to_select_split = np.diff(
89+
np.linspace(
90+
0, n_features_to_select, num=n_outputs + 1, endpoint=True, dtype=int
91+
)
92+
)
93+
indices_select = np.zeros(0, dtype=int)
94+
for i in range(n_outputs):
95+
y_i = y[:, i]
96+
batch_split_i = np.diff(
97+
np.r_[
98+
np.arange(n_to_select_split[i], step=batch_size, dtype=int),
99+
n_to_select_split[i],
100+
]
101+
)
102+
for j, batch_size_j in enumerate(batch_split_i):
103+
if j == 0:
104+
selector_j = FastCan(
105+
batch_size_j, indices_exclude=indices_select, verbose=0
106+
).fit(X, y_i)
107+
X_transformed_ = deepcopy(selector_j.X_transformed_)
108+
indices = selector_j.indices_
109+
else:
110+
indices, scores, mask = _prepare_search(
111+
n_features,
112+
batch_size_j,
113+
selector_j.indices_include_,
114+
np.r_[selector_j.indices_exclude_, indices_select],
115+
)
116+
_forward_search(
117+
X=X_transformed_,
118+
V=selector_j.y_transformed_,
119+
t=batch_size_j,
120+
tol=selector_j.tol,
121+
num_threads=n_threads,
122+
verbose=0,
123+
mask=mask,
124+
indices=indices,
125+
scores=scores,
126+
)
127+
indices_select = np.r_[indices_select, indices]
128+
if verbose == 1:
129+
print(
130+
f"Progress: {indices_select.size}/{n_features_to_select}", end="\r"
131+
)
132+
if verbose == 1:
133+
print()
134+
return indices_select

fastcan/_narx.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,8 @@ def fit(self, X, y, coef_init=None, **params):
453453
# fit a one-step-ahead Narx model
454454
xy_hstack = np.c_[X, y]
455455
osa_narx = LinearRegression()
456-
time_shift_terms = make_time_shift_features(xy_hstack, self.time_shift_ids_)
457-
poly_terms = make_poly_features(time_shift_terms, self.poly_ids_)
456+
time_shift_vars = make_time_shift_features(xy_hstack, self.time_shift_ids_)
457+
poly_terms = make_poly_features(time_shift_vars, self.poly_ids_)
458458

459459
osa_narx.fit(poly_terms, y)
460460
if coef_init is None:
@@ -644,6 +644,7 @@ def print_narx(
644644
| X[k-0,9] | 68 |
645645
"""
646646
check_is_fitted(narx)
647+
647648
def _get_variable_str(time_shift_id):
648649
if time_shift_id[0] < narx.n_features_in_:
649650
variable_str = f"X[k-{time_shift_id[1]},{time_shift_id[0]}]"
@@ -822,13 +823,13 @@ def make_narx(
822823
),
823824
0,
824825
)
825-
time_shift_terms = make_time_shift_features(xy_hstack, time_shift_ids_all)
826+
time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids_all)
826827

827828
poly_ids_all = make_poly_ids(
828829
time_shift_ids_all.shape[0],
829830
poly_degree,
830831
)
831-
poly_terms = make_poly_features(time_shift_terms, poly_ids_all)
832+
poly_terms = make_poly_features(time_shift_vars, poly_ids_all)
832833

833834
csf = FastCan(
834835
n_features_to_select,

0 commit comments

Comments
 (0)