Skip to content

Commit 183d8cc

Browse files
committed
FEAT add fd2tp and tp2fd
1 parent ee26c61 commit 183d8cc

File tree

3 files changed

+312
-70
lines changed

3 files changed

+312
-70
lines changed

examples/plot_narx.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,10 @@
125125
# In the printed NARX model, it is found that :class:`FastCan` selects the correct
126126
# terms and the coefficients are close to the true values.
127127

128-
from fastcan.narx import NARX, _pt2fd, print_narx
128+
from fastcan.narx import NARX, print_narx, tp2fd
129129

130130
# Convert poly_ids and time_shift_ids to feat_ids and delay_ids
131-
feat_ids, delay_ids = _pt2fd(selected_poly_ids, time_shift_ids)
131+
feat_ids, delay_ids = tp2fd(time_shift_ids, selected_poly_ids)
132132

133133
narx_model = NARX(
134134
feat_ids=feat_ids,

fastcan/narx.py

Lines changed: 255 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -280,31 +280,250 @@ def _mask_missing_value(*arr, return_mask=False):
280280
return tuple([x[mask_nomissing] for x in arr])
281281

282282

283-
def _fd2pt(feat_ids, delay_ids):
283+
def _valiate_time_shift_poly_ids(
284+
time_shift_ids, poly_ids, n_samples=None, n_features=None, n_outputs=None
285+
):
286+
if n_samples is None:
287+
n_samples = np.inf
288+
if n_features is None:
289+
n_features = np.inf
290+
if n_outputs is None:
291+
n_outputs = np.inf
292+
293+
# Validate time_shift_ids
294+
time_shift_ids_ = check_array(
295+
time_shift_ids,
296+
ensure_2d=True,
297+
dtype=int,
298+
)
299+
if time_shift_ids_.shape[1] != 2:
300+
raise ValueError(
301+
"time_shift_ids should have shape (n_variables, 2), "
302+
f"but got {time_shift_ids_.shape}."
303+
)
304+
if (time_shift_ids_[:, 0].min() < 0) or (
305+
time_shift_ids_[:, 0].max() >= n_features + n_outputs
306+
):
307+
raise ValueError(
308+
"The element x of the first column of time_shift_ids should "
309+
f"satisfy 0 <= x < {n_features + n_outputs}."
310+
)
311+
if (time_shift_ids_[:, 1].min() < 0) or (time_shift_ids_[:, 1].max() >= n_samples):
312+
raise ValueError(
313+
"The element x of the second column of time_shift_ids should "
314+
f"satisfy 0 <= x < {n_samples}."
315+
)
316+
# Validate poly_ids
317+
poly_ids_ = check_array(
318+
poly_ids,
319+
ensure_2d=True,
320+
dtype=int,
321+
)
322+
if (poly_ids_.min() < 0) or (poly_ids_.max() > time_shift_ids_.shape[0]):
323+
raise ValueError(
324+
"The element x of poly_ids should "
325+
f"satisfy 0 <= x <= {time_shift_ids_.shape[0]}."
326+
)
327+
return time_shift_ids_, poly_ids_
328+
329+
330+
def _validate_feat_delay_ids(
331+
feat_ids, delay_ids, n_samples=None, n_features=None, n_outputs=None
332+
):
333+
"""Validate feat_ids and delay_ids."""
334+
if n_samples is None:
335+
n_samples = np.inf
336+
if n_features is None:
337+
n_features = np.inf
338+
if n_outputs is None:
339+
n_outputs = np.inf
340+
341+
# Validate feat_ids
342+
feat_ids_ = check_array(
343+
feat_ids,
344+
ensure_2d=True,
345+
dtype=int,
346+
)
347+
if (feat_ids_.min() < -1) or (feat_ids_.max() > n_features + n_outputs - 1):
348+
raise ValueError(
349+
"The element x of feat_ids should "
350+
f"satisfy -1 <= x <= {n_features + n_outputs - 1}."
351+
)
352+
# Validate delay_ids
353+
delay_ids_ = check_array(
354+
delay_ids,
355+
ensure_2d=True,
356+
dtype=int,
357+
)
358+
if delay_ids_.shape != feat_ids_.shape:
359+
raise ValueError(
360+
"The shape of delay_ids should be equal to "
361+
f"the shape of feat_ids {feat_ids_.shape}, "
362+
f"but got {delay_ids_.shape}."
363+
)
364+
if ((delay_ids_ == -1) != (feat_ids_ == -1)).any():
365+
raise ValueError(
366+
"The element x of delay_ids should be -1 "
367+
"if and only if the element x of feat_ids is -1."
368+
)
369+
if (delay_ids_.min() < -1) or (delay_ids_.max() >= n_samples):
370+
raise ValueError(
371+
"The element x of delay_ids should " f"satisfy -1 <= x < {n_samples}."
372+
)
373+
return feat_ids_, delay_ids_
374+
375+
376+
@validate_params(
377+
{
378+
"feat_ids": ["array-like"],
379+
"delay_ids": ["array-like"],
380+
},
381+
prefer_skip_nested_validation=True,
382+
)
383+
def fd2tp(feat_ids, delay_ids):
284384
"""
285-
Convert feat_ids and delay_ids to poly_ids and time_shift_ids
385+
Convert feat_ids and delay_ids to time_shift_ids and poly_ids.
386+
The polynomial terms, e.g., x0(k-1)^2, x0(k-2)x1(k-3), can be
387+
represented by two ways:
388+
389+
#. feat_ids and delay_ids, e.g., [[0, 0], [0, 1]] and [[1, 1], [2, 3]]
390+
391+
#. time_shift_ids and poly_ids, e.g., [[0, 1], [0, 2], [1, 3]] and [[1, 1], [2, 3]]
392+
393+
For feat_ids, [0, 0] and [0, 1] represent x0*x0 and x0*x1, while
394+
for delay_ids, [1, 1] and [2, 3] represent the delays of features in feat_ids.
395+
396+
For time_shift_ids, [0, 1], [0, 2], and [1, 3] represents x0(k-1), x0(k-2),
397+
and x1(k-3), respectively. For poly_ids, [1, 1] and [2, 3] represent the first
398+
variable multiplying the first variable given by time_shift_ids, i.e.,
399+
x0(k-1)*x0(k-1), and the second variable multiplying the thrid variable, i.e.,
400+
x0(k-1)*x1(k-3).
401+
402+
Parameters
403+
----------
404+
feat_ids : array-like of shape (n_terms, degree), default=None
405+
The unique id numbers of features to form polynomial terms.
406+
The id -1 stands for the constant 1.
407+
The id 0 to n are the index of features.
408+
409+
delay_ids : array-like of shape (n_terms, degree), default=None
410+
The delays of each feature in polynomial terms.
411+
The id -1 stands for empty.
412+
The id 0 stands for 0 delay.
413+
The positive integer id k stands for k-th delay.
414+
415+
Returns
416+
-------
417+
time_shift_ids : array-like of shape (n_variables, 2), default=None
418+
The unique id numbers of time shift variables, which are
419+
(feature_idx, delay).
420+
421+
poly_ids : array-like of shape (n_polys, degree), default=None
422+
The unique id numbers of polynomial terms, excluding the intercept.
423+
The id 0 stands for the constant 1.
424+
The id 1 to n are the index+1 of time_shift_ids.
425+
426+
Examples
427+
--------
428+
>>> from fastcan.narx import fd2tp
429+
>>> # Encode x0(k-1), x0(k-2)x1(k-3)
430+
>>> feat_ids = [[-1, 0], [0, 1]]
431+
>>> delay_ids = [[-1, 1], [2, 3]]
432+
>>> time_shift_ids, poly_ids = fd2tp(feat_ids, delay_ids)
433+
>>> print(time_shift_ids)
434+
[[0 1]
435+
[0 2]
436+
[1 3]]
437+
>>> print(poly_ids)
438+
[[0 1]
439+
[2 3]]
286440
"""
287-
featd = np.c_[feat_ids.flatten(), delay_ids.flatten()]
441+
_feat_ids, _delay_ids = _validate_feat_delay_ids(feat_ids, delay_ids)
442+
featd = np.c_[_feat_ids.flatten(), _delay_ids.flatten()]
288443
# Ensure featd has at least one [-1, -1]
289444
time_shift_ids = np.unique(np.r_[[[-1, -1]], featd], axis=0)
290445
poly_ids = np.array(
291446
[np.where((time_shift_ids == row).all(axis=1))[0][0] for row in featd]
292-
).reshape(feat_ids.shape)
447+
).reshape(_feat_ids.shape)
293448
time_shift_ids = time_shift_ids[time_shift_ids[:, 0] != -1]
294-
return poly_ids, time_shift_ids
449+
return time_shift_ids, poly_ids
295450

296451

297-
def _pt2fd(poly_ids, time_shift_ids):
452+
@validate_params(
453+
{
454+
"time_shift_ids": ["array-like"],
455+
"poly_ids": ["array-like"],
456+
},
457+
prefer_skip_nested_validation=True,
458+
)
459+
def tp2fd(time_shift_ids, poly_ids):
298460
"""
299-
Convert poly_ids and time_shift_ids to feat_ids and delay_ids
461+
Convert time_shift_ids and poly_ids to feat_ids and delay_ids.
462+
The polynomial terms, e.g., x0(k-1)^2, x0(k-2)x1(k-3), can be
463+
represented by two ways:
464+
465+
#. feat_ids and delay_ids, e.g., [[0, 0], [0, 1]] and [[1, 1], [2, 3]]
466+
467+
#. time_shift_ids and poly_ids, e.g., [[0, 1], [0, 2], [1, 3]] and [[1, 1], [2, 3]]
468+
469+
For feat_ids, [0, 0] and [0, 1] represent x0*x0 and x0*x1, while
470+
for delay_ids, [1, 1] and [2, 3] represent the delays of features in feat_ids.
471+
472+
For time_shift_ids, [0, 1], [0, 2], and [1, 3] represents x0(k-1), x0(k-2),
473+
and x1(k-3), respectively. For poly_ids, [1, 1] and [2, 3] represent the first
474+
variable multiplying the first variable given by time_shift_ids, i.e.,
475+
x0(k-1)*x0(k-1), and the second variable multiplying the thrid variable, i.e.,
476+
x0(k-1)*x1(k-3).
477+
478+
Parameters
479+
----------
480+
time_shift_ids : array-like of shape (n_variables, 2)
481+
The unique id numbers of time shift variables, which are
482+
(feature_idx, delay).
483+
484+
poly_ids : array-like of shape (n_polys, degree)
485+
The unique id numbers of polynomial terms, excluding the intercept.
486+
The id 0 stands for the constant 1.
487+
The id 1 to n are the index+1 of time_shift_ids.
488+
489+
Returns
490+
-------
491+
feat_ids : array-like of shape (n_terms, degree), default=None
492+
The unique id numbers of features to form polynomial terms.
493+
The id -1 stands for the constant 1.
494+
The id 0 to n are the index of features.
495+
496+
delay_ids : array-like of shape (n_terms, degree), default=None
497+
The delays of each feature in polynomial terms.
498+
The id -1 stands for empty.
499+
The id 0 stands for 0 delay.
500+
The positive integer id k stands for k-th delay.
501+
502+
Examples
503+
--------
504+
>>> from fastcan.narx import tp2fd
505+
>>> # Encode x0(k-1), x0(k-2)x1(k-3)
506+
>>> time_shift_ids = [[0, 1], [0, 2], [1, 3]]
507+
>>> poly_ids = [[0, 1], [2, 3]]
508+
>>> feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids)
509+
>>> print(feat_ids)
510+
[[-1 0]
511+
[ 0 1]]
512+
>>> print(delay_ids)
513+
[[-1 1]
514+
[ 2 3]]
300515
"""
301-
feat_ids = np.full_like(poly_ids, -1, dtype=int)
302-
delay_ids = np.full_like(poly_ids, -1, dtype=int)
303-
for i, poly_id in enumerate(poly_ids):
516+
_time_shift_ids, _poly_ids = _valiate_time_shift_poly_ids(
517+
time_shift_ids,
518+
poly_ids,
519+
)
520+
feat_ids = np.full_like(_poly_ids, -1, dtype=int)
521+
delay_ids = np.full_like(_poly_ids, -1, dtype=int)
522+
for i, poly_id in enumerate(_poly_ids):
304523
for j, variable_id in enumerate(poly_id):
305524
if variable_id != 0:
306-
feat_ids[i, j] = time_shift_ids[variable_id - 1, 0]
307-
delay_ids[i, j] = time_shift_ids[variable_id - 1, 1]
525+
feat_ids[i, j] = _time_shift_ids[variable_id - 1, 0]
526+
delay_ids[i, j] = _time_shift_ids[variable_id - 1, 1]
308527
feat_ids = feat_ids
309528
delay_ids = delay_ids
310529
return feat_ids, delay_ids
@@ -313,12 +532,12 @@ def _pt2fd(poly_ids, time_shift_ids):
313532
class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
314533
"""The Nonlinear Autoregressive eXogenous (NARX) model class.
315534
For example, a (polynomial) NARX model is like
316-
y(t) = y(t-1)*u(t-1) + u(t-1)^2 + u(t-2) + 1.5
317-
where y(t) is the system output at time t,
318-
u(t) is the system input at time t,
535+
y(k) = y(k-1)*u(k-1) + u(k-1)^2 + u(k-2) + 1.5
536+
where y(k) is the system output at the k-th time step,
537+
u(k) is the system input at the k-th time step,
319538
u and y is called features,
320-
u(t-1) is called a (time shift) variable,
321-
u(t-1)^2 is called a (polynomial) term, and
539+
u(k-1) is called a (time shift) variable,
540+
u(k-1)^2 is called a (polynomial) term, and
322541
1.5 is called an intercept.
323542
324543
Parameters
@@ -338,7 +557,7 @@ class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
338557
The id -1 stands for empty.
339558
The id 0 stands for 0 delay.
340559
The positive integer id k stands for k-th delay.
341-
E.g. for the polynomial terms 1*u(k-1, 0), 1*u(k, 1),
560+
E.g., for the polynomial terms 1*u(k-1, 0), 1*u(k, 1),
342561
and u(k-1, 0)*y(k-2, 0), the delay_ids [[-1, 1], [-1, 0], [1, 2]].
343562
344563
output_ids : array-like of shape (n_polys,), default=None
@@ -489,48 +708,26 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
489708
self.n_outputs_ = y.shape[1]
490709
n_samples, n_features = X.shape
491710

492-
# Validate feat_ids
493711
if self.feat_ids is None:
494-
self.feat_ids_ = make_poly_ids(n_features, 1) - 1
712+
feat_ids_ = make_poly_ids(n_features, 1) - 1
495713
else:
496-
self.feat_ids_ = check_array(
497-
self.feat_ids,
498-
ensure_2d=True,
499-
dtype=int,
500-
)
501-
if (self.feat_ids_.min() < -1) or (
502-
self.feat_ids_.max() > n_features + self.n_outputs_ - 1
503-
):
504-
raise ValueError(
505-
"The element x of feat_ids should "
506-
f"satisfy -1 <= x <= {n_features + self.n_outputs_ - 1}."
507-
)
714+
feat_ids_ = self.feat_ids
715+
508716
if self.delay_ids is None:
509-
self.delay_ids_ = np.copy(self.feat_ids_)
510-
self.delay_ids_[(self.feat_ids_ > -1) & (self.feat_ids_ < n_features)] = 0
511-
self.delay_ids_[(self.feat_ids_ >= n_features)] = 1
717+
delay_ids_ = np.copy(feat_ids_)
718+
delay_ids_[(feat_ids_ > -1) & (feat_ids_ < n_features)] = 0
719+
delay_ids_[(feat_ids_ >= n_features)] = 1
512720
else:
513-
self.delay_ids_ = check_array(
514-
self.delay_ids,
515-
ensure_2d=True,
516-
dtype=int,
517-
)
518-
if self.delay_ids_.shape != self.feat_ids_.shape:
519-
raise ValueError(
520-
"The shape of delay_ids should be equal to "
521-
f"the shape of feat_ids {self.feat_ids_.shape}, "
522-
f"but got {self.delay_ids_.shape}."
523-
)
524-
if ((self.delay_ids_ == -1) != (self.feat_ids_ == -1)).any():
525-
raise ValueError(
526-
"The element x of delay_ids should be -1 "
527-
"if and only if the element x of feat_ids is -1."
528-
)
529-
if (self.delay_ids_.min() < -1) or (self.delay_ids_.max() >= n_samples):
530-
raise ValueError(
531-
"The element x of delay_ids should "
532-
f"satisfy -1 <= x < {n_samples}."
533-
)
721+
delay_ids_ = self.delay_ids
722+
723+
# Validate feat_ids and delay_ids
724+
self.feat_ids_, self.delay_ids_ = _validate_feat_delay_ids(
725+
feat_ids_,
726+
delay_ids_,
727+
n_samples=n_samples,
728+
n_features=n_features,
729+
n_outputs=self.n_outputs_,
730+
)
534731

535732
n_terms = self.feat_ids_.shape[0]
536733
# Validate output_ids
@@ -569,7 +766,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
569766

570767
if isinstance(coef_init, (type(None), str)):
571768
# fit a one-step-ahead NARX model
572-
poly_ids, time_shift_ids = _fd2pt(self.feat_ids_, self.delay_ids_)
769+
time_shift_ids, poly_ids = fd2tp(self.feat_ids_, self.delay_ids_)
573770
xy_hstack = np.c_[X, y]
574771
osa_narx = LinearRegression()
575772
time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids)
@@ -1221,7 +1418,7 @@ def make_narx(
12211418
)
12221419

12231420
output_ids = [i for i in range(n_outputs) for _ in range(n_terms_to_select[i])]
1224-
feat_ids, delay_ids = _pt2fd(poly_ids, time_shift_ids)
1421+
feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids)
12251422

12261423
return NARX(
12271424
feat_ids=feat_ids,

0 commit comments

Comments
 (0)