Skip to content

Commit 3aecd2b

Browse files
committed
FEAT add param fit_intercept for NARX
1 parent be0dc25 commit 3aecd2b

File tree

5 files changed

+176
-15
lines changed

5 files changed

+176
-15
lines changed

.github/workflows/lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99

1010
steps:
1111
- uses: actions/checkout@v4
12-
- uses: prefix-dev/[email protected].4
12+
- uses: prefix-dev/[email protected].8
1313
with:
1414
environments: default
1515
cache: true

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121

2222
steps:
2323
- uses: actions/checkout@v4
24-
- uses: prefix-dev/[email protected].3
24+
- uses: prefix-dev/[email protected].8
2525
with:
2626
environments: default
2727
cache: true

fastcan/narx.py

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,8 @@ class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
570570
The id numbers indicate which output the polynomial term belongs to.
571571
It is useful in multi-output case.
572572
573+
fit_intercept : bool, default=True
574+
Whether to fit the intercept. If set to False, intercept will be zeros.
573575
574576
Attributes
575577
----------
@@ -641,6 +643,7 @@ class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
641643
"feat_ids": [None, "array-like"],
642644
"delay_ids": [None, "array-like"],
643645
"output_ids": [None, "array-like"],
646+
"fit_intercept": ["boolean"],
644647
}
645648

646649
def __init__(
@@ -649,10 +652,12 @@ def __init__(
649652
feat_ids=None,
650653
delay_ids=None,
651654
output_ids=None,
655+
fit_intercept=True,
652656
):
653657
self.feat_ids = feat_ids
654658
self.delay_ids = delay_ids
655659
self.output_ids = output_ids
660+
self.fit_intercept = fit_intercept
656661

657662
@validate_params(
658663
{
@@ -769,13 +774,12 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
769774
)
770775

771776
self.max_delay_ = self.delay_ids_.max()
772-
n_coef_intercept = n_terms + self.n_outputs_
773777

774778
if isinstance(coef_init, (type(None), str)):
775779
# fit a one-step-ahead NARX model
776780
time_shift_ids, poly_ids = fd2tp(self.feat_ids_, self.delay_ids_)
777781
xy_hstack = np.c_[X, y]
778-
osa_narx = LinearRegression()
782+
osa_narx = LinearRegression(fit_intercept=self.fit_intercept)
779783
time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids)
780784
poly_terms = make_poly_features(time_shift_vars, poly_ids)
781785
# Remove missing values
@@ -787,7 +791,10 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
787791
for i in range(self.n_outputs_):
788792
output_i_mask = self.output_ids_ == i
789793
if np.sum(output_i_mask) == 0:
790-
intercept[i] = np.mean(y_masked[:, i])
794+
if self.fit_intercept:
795+
intercept[i] = np.mean(y_masked[:, i])
796+
else:
797+
intercept[i] = 0.0
791798
continue
792799
osa_narx.fit(
793800
poly_terms_masked[:, output_i_mask],
@@ -801,13 +808,20 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
801808
self.intercept_ = intercept
802809
return self
803810

804-
coef_init = np.r_[coef, intercept]
811+
if self.fit_intercept:
812+
coef_init = np.r_[coef, intercept]
813+
else:
814+
coef_init = coef
805815
else:
806816
coef_init = check_array(
807817
coef_init,
808818
ensure_2d=False,
809819
dtype=float,
810820
)
821+
if self.fit_intercept:
822+
n_coef_intercept = n_terms + self.n_outputs_
823+
else:
824+
n_coef_intercept = n_terms
811825
if coef_init.shape[0] != n_coef_intercept:
812826
raise ValueError(
813827
"`coef_init` should have the shape of "
@@ -829,6 +843,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
829843
self.feat_ids_,
830844
self.delay_ids_,
831845
self.output_ids_,
846+
self.fit_intercept,
832847
sample_weight_sqrt,
833848
grad_yyd_ids,
834849
grad_coef_ids,
@@ -837,8 +852,12 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
837852
),
838853
**params,
839854
)
840-
self.coef_ = res.x[: -self.n_outputs_]
841-
self.intercept_ = res.x[-self.n_outputs_ :]
855+
if self.fit_intercept:
856+
self.coef_ = res.x[: -self.n_outputs_]
857+
self.intercept_ = res.x[-self.n_outputs_ :]
858+
else:
859+
self.coef_ = res.x
860+
self.intercept_ = np.zeros(self.n_outputs_, dtype=float)
842861
return self
843862

844863
@staticmethod
@@ -931,6 +950,7 @@ def _update_dydx(
931950
feat_ids,
932951
delay_ids,
933952
output_ids,
953+
fit_intercept,
934954
grad_yyd_ids,
935955
grad_coef_ids,
936956
grad_feat_ids,
@@ -947,8 +967,13 @@ def _update_dydx(
947967
n_samples, n_y = y_hat.shape
948968
max_delay = np.max(delay_ids)
949969
n_coefs = feat_ids.shape[0]
950-
n_x = n_coefs + n_y # Total number of coefficients and intercepts
951-
y_ids = np.r_[output_ids, np.arange(n_y)]
970+
if fit_intercept:
971+
n_x = n_coefs + n_y # Total number of coefficients and intercepts
972+
y_ids = np.r_[output_ids, np.arange(n_y)]
973+
else:
974+
n_x = n_coefs
975+
y_ids = output_ids
976+
952977
x_ids = np.arange(n_x)
953978

954979
dydx = np.zeros((n_samples, n_y, n_x), dtype=float)
@@ -1011,13 +1036,18 @@ def _loss(
10111036
feat_ids,
10121037
delay_ids,
10131038
output_ids,
1039+
fit_intercept,
10141040
sample_weight_sqrt,
10151041
*args,
10161042
):
10171043
# Sum of squared errors
10181044
n_outputs = y.shape[1]
1019-
coef = coef_intercept[:-n_outputs]
1020-
intercept = coef_intercept[-n_outputs:]
1045+
if fit_intercept:
1046+
coef = coef_intercept[:-n_outputs]
1047+
intercept = coef_intercept[-n_outputs:]
1048+
else:
1049+
coef = coef_intercept
1050+
intercept = np.zeros(n_outputs, dtype=float)
10211051

10221052
y_hat = NARX._predict(
10231053
expression,
@@ -1045,6 +1075,7 @@ def _grad(
10451075
feat_ids,
10461076
delay_ids,
10471077
output_ids,
1078+
fit_intercept,
10481079
sample_weight_sqrt,
10491080
grad_yyd_ids,
10501081
grad_coef_ids,
@@ -1053,8 +1084,12 @@ def _grad(
10531084
):
10541085
# Sum of squared errors
10551086
n_outputs = y.shape[1]
1056-
coef = coef_intercept[:-n_outputs]
1057-
intercept = coef_intercept[-n_outputs:]
1087+
if fit_intercept:
1088+
coef = coef_intercept[:-n_outputs]
1089+
intercept = coef_intercept[-n_outputs:]
1090+
else:
1091+
coef = coef_intercept
1092+
intercept = np.zeros(n_outputs, dtype=float)
10581093

10591094
y_hat = NARX._predict(
10601095
expression,
@@ -1073,6 +1108,7 @@ def _grad(
10731108
feat_ids,
10741109
delay_ids,
10751110
output_ids,
1111+
fit_intercept,
10761112
grad_yyd_ids,
10771113
grad_coef_ids,
10781114
grad_feat_ids,
@@ -1266,6 +1302,7 @@ def _get_term_str(term_feat_ids, term_delay_ids):
12661302
"poly_degree": [
12671303
Interval(Integral, 1, None, closed="left"),
12681304
],
1305+
"fit_intercept": ["boolean"],
12691306
"include_zero_delay": [None, "array-like"],
12701307
"static_indices": [None, "array-like"],
12711308
"refine_verbose": ["verbose"],
@@ -1288,6 +1325,7 @@ def make_narx(
12881325
max_delay=1,
12891326
poly_degree=1,
12901327
*,
1328+
fit_intercept=True,
12911329
include_zero_delay=None,
12921330
static_indices=None,
12931331
refine_verbose=1,
@@ -1316,6 +1354,9 @@ def make_narx(
13161354
poly_degree : int, default=1
13171355
The maximum degree of polynomial features.
13181356
1357+
fit_intercept : bool, default=True
1358+
Whether to fit the intercept. If set to False, intercept will be zeros.
1359+
13191360
include_zero_delay : {None, array-like} of shape (n_features,) default=None
13201361
Whether to include the original (zero-delay) features.
13211362
@@ -1469,4 +1510,5 @@ def make_narx(
14691510
feat_ids=feat_ids,
14701511
delay_ids=delay_ids,
14711512
output_ids=output_ids,
1513+
fit_intercept=fit_intercept,
14721514
)

tests/test_narx.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,46 @@ def test_mulit_output_warn():
278278
0.0,
279279
)
280280

281+
def test_fit_intercept():
282+
X = np.random.rand(10, 2)
283+
y = np.random.rand(10, 1)
284+
time_shift_ids = np.array([[0, 1], [1, 1]])
285+
poly_ids = np.array([[1, 1], [2, 2]])
286+
feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids)
287+
288+
narx = NARX(
289+
feat_ids=feat_ids,
290+
delay_ids=delay_ids,
291+
fit_intercept=False,
292+
)
293+
narx.fit(X, y)
294+
assert_almost_equal(narx.intercept_, 0.0)
295+
narx.fit(X, y, coef_init="one_step_ahead")
296+
assert_almost_equal(narx.intercept_, 0.0)
297+
298+
X = np.random.rand(10, 2)
299+
y = np.random.rand(10, 2)
300+
time_shift_ids = np.array([[0, 1], [1, 1]])
301+
poly_ids = np.array([[1, 1], [2, 2]])
302+
feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids)
303+
304+
narx = make_narx(X, y, 1, 2, 2, fit_intercept=False)
305+
narx.fit(X, y)
306+
assert_array_equal(narx.intercept_, [0.0, 0.0])
307+
narx.fit(X, y, coef_init="one_step_ahead")
308+
assert_array_equal(narx.intercept_, [0.0, 0.0])
309+
310+
with pytest.warns(UserWarning, match="output_ids got"):
311+
narx = NARX(
312+
feat_ids=feat_ids,
313+
delay_ids=delay_ids,
314+
fit_intercept=False,
315+
)
316+
narx.fit(X, y)
317+
assert_array_equal(narx.intercept_, [0.0, 0.0])
318+
narx.fit(X, y, coef_init=[0, 0])
319+
assert_array_equal(narx.intercept_, [0.0, 0.0])
320+
281321
def test_mulit_output_error():
282322
X = np.random.rand(10, 2)
283323
y = np.random.rand(10, 2)

tests/test_narx_jac.py

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def test_simple():
2424
output_ids = np.array([0, 0], dtype=np.int32)
2525
coef = np.array([0.4, 1])
2626
intercept = np.array([1], dtype=float)
27-
sample_weight = np.array([1, 1, 1], dtype=float)
27+
sample_weight = np.array([1, 1, 1], dtype=float).reshape(-1, 1)
2828

2929

3030
y_hat = NARX._predict(
@@ -72,6 +72,7 @@ def test_simple():
7272
feat_ids,
7373
delay_ids,
7474
output_ids,
75+
True,
7576
np.sqrt(sample_weight),
7677
grad_yyd_ids,
7778
grad_coef_ids,
@@ -80,6 +81,37 @@ def test_simple():
8081
)
8182

8283
assert_almost_equal(grad.sum(axis=0), grad_truth, decimal=4)
84+
grad_0 = NARX._grad(
85+
np.r_[coef_1, 0],
86+
_predict_step,
87+
X,
88+
y,
89+
feat_ids,
90+
delay_ids,
91+
output_ids,
92+
True,
93+
np.sqrt(sample_weight),
94+
grad_yyd_ids,
95+
grad_coef_ids,
96+
grad_feat_ids,
97+
grad_delay_ids,
98+
)
99+
grad = NARX._grad(
100+
coef_1,
101+
_predict_step,
102+
X,
103+
y,
104+
feat_ids,
105+
delay_ids,
106+
output_ids,
107+
False,
108+
np.sqrt(sample_weight),
109+
grad_yyd_ids,
110+
grad_coef_ids,
111+
grad_feat_ids,
112+
grad_delay_ids,
113+
)
114+
assert_almost_equal(grad.sum(axis=0), grad_0.sum(axis=0)[:-1])
83115

84116
def test_complex():
85117
"""Complex model"""
@@ -172,6 +204,7 @@ def test_complex():
172204
feat_ids,
173205
delay_ids,
174206
output_ids,
207+
True,
175208
np.sqrt(np.ones((y.shape[0], 1))),
176209
grad_yyd_ids,
177210
grad_coef_ids,
@@ -219,6 +252,52 @@ def test_complex():
219252

220253
assert_allclose(grad.sum(axis=0)[i], grad_num.sum(), rtol=1e-1)
221254

255+
grad = NARX._grad(
256+
coef,
257+
_predict_step,
258+
X,
259+
y,
260+
feat_ids,
261+
delay_ids,
262+
output_ids,
263+
False,
264+
np.sqrt(np.ones((y.shape[0], 1))),
265+
grad_yyd_ids,
266+
grad_coef_ids,
267+
grad_feat_ids,
268+
grad_delay_ids,
269+
)
270+
y_hat_0 = NARX._predict(
271+
_predict_step,
272+
X=X,
273+
y_ref=y,
274+
coef=coef,
275+
intercept=[0, 0],
276+
feat_ids=feat_ids,
277+
delay_ids=delay_ids,
278+
output_ids=output_ids,
279+
)
280+
e_0 = y_hat_0 - y
281+
282+
for i in range(len(coef)):
283+
coef_1 = np.copy(coef)
284+
coef_1[i] += delta_w
285+
286+
y_hat_1 = NARX._predict(
287+
_predict_step,
288+
X=X,
289+
y_ref=y,
290+
coef=coef_1,
291+
intercept=[0, 0],
292+
feat_ids=feat_ids,
293+
delay_ids=delay_ids,
294+
output_ids=output_ids,
295+
)
296+
297+
e_1 = y_hat_1 - y
298+
grad_num = (e_1 - e_0).sum(axis=1) / delta_w
299+
300+
assert_allclose(grad.sum(axis=0)[i], grad_num.sum(), rtol=1e-1)
222301

223302
def test_score_nan():
224303
"""Test fitting scores when data contain nan."""

0 commit comments

Comments
 (0)