@@ -406,10 +406,10 @@ class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
406
406
>>> print_narx(narx)
407
407
| yid | Term | Coef |
408
408
=======================================
409
- | 0 | Intercept | 1.069 |
410
- | 0 | y_hat[k-1,0] | 0.478 |
411
- | 0 | X[k-2,0] | 0.716 |
412
- | 0 | X[k-1,0]*X[k-3,0] | 1.504 |
409
+ | 0 | Intercept | 1.008 |
410
+ | 0 | y_hat[k-1,0] | 0.498 |
411
+ | 0 | X[k-2,0] | 0.701 |
412
+ | 0 | X[k-1,0]*X[k-3,0] | 1.496 |
413
413
"""
414
414
415
415
_parameter_constraints : dict = {
@@ -466,7 +466,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
466
466
467
467
**params : dict
468
468
Keyword arguments passed to
469
- `scipy.optimize.minimize `.
469
+ `scipy.optimize.least_squares `.
470
470
471
471
Returns
472
472
-------
@@ -601,7 +601,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
601
601
coef_init = check_array (
602
602
coef_init ,
603
603
ensure_2d = False ,
604
- dtype = np . float64 ,
604
+ dtype = float ,
605
605
)
606
606
if coef_init .shape [0 ] != n_coef_intercept :
607
607
raise ValueError (
@@ -634,15 +634,15 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
634
634
return self
635
635
636
636
@staticmethod
637
- def _evaluate_term (term_id , delay_id , X , y_hat , k ):
637
+ def _evaluate_term (feat_ids , delay_ids , X , y_hat , k ):
638
638
n_features_in = X .shape [1 ]
639
639
term = 1
640
- for i , feat_id in enumerate (term_id ):
640
+ for i , feat_id in enumerate (feat_ids ):
641
641
if feat_id != - 1 :
642
642
if feat_id < n_features_in :
643
- term *= X [k - delay_id [i ], feat_id ]
643
+ term *= X [k - delay_ids [i ], feat_id ]
644
644
else :
645
- term *= y_hat [k - delay_id [i ], feat_id - n_features_in ]
645
+ term *= y_hat [k - delay_ids [i ], feat_id - n_features_in ]
646
646
return term
647
647
648
648
@staticmethod
@@ -666,7 +666,7 @@ def _predict(
666
666
at_init = True
667
667
init_k = 0
668
668
for k in range (n_samples ):
669
- if ~ np .all (np .isfinite (X [k ])):
669
+ if not np .all (np .isfinite (X [k ])):
670
670
at_init = True
671
671
init_k = k + 1
672
672
y_hat [k ] = np .nan
@@ -687,85 +687,101 @@ def _predict(
687
687
688
688
@staticmethod
689
689
def _get_cfd_ids (feat_ids , delay_ids , output_ids , n_features_in ):
690
- n_y = np .max (output_ids ) + 1 # number of output
691
- n_d = np .max (delay_ids ) # max delay
692
-
693
- n_c = feat_ids .shape [0 ] # number of coef
694
- # number of dy/dx, [dy0(k)/dx, dy1(k)/dx, dy0(k-1)/dx, dy1(k-1)/dx, ...]
695
- n_dydx = n_y * n_d
696
- c_ids = np .arange (n_c ) # Coef index
697
-
698
- # Coef ids, feature ids, delay ids
699
- # cfd_ids is n_y * n_dydx
700
- cfd_ids = [[[] for _ in range (n_dydx )] for _ in range (n_y )]
701
- for i in range (n_y ):
702
- for j in range (n_dydx ):
703
- # Get dy[y_j](k - d_j)/dx
704
- d_j = j // n_y + 1 # delay
705
- y_j = j % n_y + n_features_in # output index
706
- output_mask = output_ids == i
707
- terms = feat_ids [output_mask ]
708
- delays = delay_ids [output_mask ]
709
- c_id = c_ids [output_mask ]
710
- for t , (term , delay ) in enumerate (zip (terms , delays )):
711
- if np .any ((y_j == term ) & (d_j == delay )):
712
- a_ij = []
713
- for f , (feat , k ) in enumerate (zip (term , delay )):
714
- if (feat == y_j ) and (k == d_j ):
715
- a_ij += [
716
- [c_id [t ], np .delete (term , f ), np .delete (delay , f )]
717
- ]
718
- cfd_ids [i ][j ] += a_ij
690
+ """
691
+ Get ids of CFD (Coef, Feature, and Delay) matrix to update dyn(k)/dx.
692
+ Maps coefficients to their corresponding features and delays.
693
+ """
694
+ n_outputs = np .max (output_ids ) + 1
695
+ max_delay = np .max (delay_ids )
696
+
697
+ # Initialize cfd_ids as a list of lists n_outputs * n_outputs * max_delay
698
+ # axis-0 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx]
699
+ # axis-1 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx]
700
+ # axis-2 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx]
701
+ cfd_ids = [
702
+ [[[] for _ in range (max_delay )] for _ in range (n_outputs )]
703
+ for _ in range (n_outputs )
704
+ ]
705
+
706
+ for coef_id , (term_feat_ids , term_delay_ids ) in enumerate (
707
+ zip (feat_ids , delay_ids )
708
+ ):
709
+ row_y_id = output_ids [coef_id ]
710
+ for var_id , (feat_id , delay_id ) in enumerate (
711
+ zip (term_feat_ids , term_delay_ids )
712
+ ):
713
+ if feat_id >= n_features_in and delay_id > 0 :
714
+ col_y_id = feat_id - n_features_in
715
+ cfd_ids [row_y_id ][col_y_id ][delay_id - 1 ].append (
716
+ [
717
+ coef_id ,
718
+ np .delete (term_feat_ids , var_id ),
719
+ np .delete (term_delay_ids , var_id ),
720
+ ]
721
+ )
722
+
719
723
return cfd_ids
720
724
721
725
@staticmethod
722
726
def _update_cfd (X , y_hat , coef , cfd_ids , k ):
723
- n_y = y_hat .shape [1 ]
724
- n_dydx = len (cfd_ids [0 ])
725
- cfd = np .zeros ((n_y , n_dydx ))
726
- for i in range (n_y ):
727
- for j in range (n_dydx ):
728
- if cfd_ids [i ][j ]:
729
- a_ij = 0
730
- for coef_id , term_id , delay_id in cfd_ids [i ][j ]:
731
- a_ij += coef [coef_id ] * NARX ._evaluate_term (
732
- term_id , delay_id , X , y_hat , k
727
+ """
728
+ Updates CFD matrix based on the current state.
729
+ """
730
+ n_outputs , max_delay = y_hat .shape [1 ], len (cfd_ids [0 ][0 ])
731
+ cfd = np .zeros ((n_outputs , n_outputs , max_delay ))
732
+
733
+ for i , yi_ids in enumerate (cfd_ids ):
734
+ for j , yiyj in enumerate (yi_ids ):
735
+ for d , yiyjd in enumerate (yiyj ):
736
+ if yiyjd :
737
+ cfd [i , j , d ] = sum (
738
+ coef [coef_id ]
739
+ * NARX ._evaluate_term (feat_id , delay_id , X , y_hat , k )
740
+ for coef_id , feat_id , delay_id in yiyjd
733
741
)
734
- cfd [i , j ] = a_ij
735
742
return cfd
736
743
737
744
@staticmethod
738
745
def _update_dydx (X , y_hat , coef , feat_ids , delay_ids , output_ids , cfd_ids ):
739
- n_samples = X .shape [0 ]
740
- n_y = y_hat .shape [1 ]
746
+ """
747
+ Computation of the Jacobian matrix dydx.
748
+
749
+ Returns
750
+ -------
751
+ dydx : ndarray of shape (n_samples, n_outputs, n_x)
752
+ Jacobian matrix of the outputs with respect to coefficients and intercepts.
753
+ """
754
+ n_samples , n_y = y_hat .shape
741
755
max_delay = np .max (delay_ids )
742
- n_c = feat_ids .shape [0 ]
743
- n_x = n_c + n_y
744
- output_x_ids = np .r_ [output_ids , np .arange (n_y )]
745
- if max_delay == 0 :
746
- dydx = np .zeros ((n_samples , n_x , n_y ))
747
- else :
748
- dydx = np .zeros ((n_samples , n_x , n_y * max_delay ))
749
- for k in range (max_delay , n_samples ):
750
- for i in range (n_x ):
751
- if i < n_c :
752
- term = NARX ._evaluate_term (feat_ids [i ], delay_ids [i ], X , y_hat , k )
753
- else :
754
- term = 1
756
+ n_coefs = feat_ids .shape [0 ]
757
+ n_x = n_coefs + n_y # Total number of coefficients and intercepts
758
+ y_ids = np .r_ [output_ids , np .arange (n_y )]
759
+ x_ids = np .arange (n_x )
755
760
756
- if ~ np .isfinite (term ):
757
- continue
758
- dydx [k , i , output_x_ids [i ]] = term
759
- if max_delay != 0 :
760
- cfd = NARX ._update_cfd (X , y_hat , coef , cfd_ids , k )
761
- if ~ np .all (np .isfinite (cfd )):
762
- continue
763
- dydx [k , i , :n_y ] += cfd @ dydx [k - 1 , i ]
764
- dydx [k , i , n_y :] = dydx [k - 1 , i , :- n_y ]
761
+ dydx = np .zeros ((n_samples , n_y , n_x ), dtype = float )
762
+ for k in range (max_delay , n_samples ):
763
+ # Compute terms for time step k
764
+ terms = np .ones (n_x , dtype = float )
765
+ for j in range (n_coefs ):
766
+ terms [j ] = NARX ._evaluate_term (feat_ids [j ], delay_ids [j ], X , y_hat , k )
767
+ if not np .all (np .isfinite (terms )):
768
+ break
769
+
770
+ # Update constant terms of Jacobian
771
+ dydx [k , y_ids , x_ids ] = terms
772
+
773
+ # Update dynamic terms of Jacobian
774
+ if max_delay > 0 :
775
+ cfd = NARX ._update_cfd (X , y_hat , coef , cfd_ids , k )
776
+ for d in range (max_delay ):
777
+ dydx [k ] += cfd [:, :, d ] @ dydx [k - d - 1 ]
778
+
779
+ # Handle divergence
765
780
if np .any (dydx [k ] > 1e20 ):
766
781
dydx [k :] = 1e20
767
- return dydx [:, :, :n_y ]
768
- return dydx [:, :, :n_y ]
782
+ break
783
+
784
+ return dydx
769
785
770
786
@staticmethod
771
787
def _loss (
@@ -821,15 +837,11 @@ def _grad(
821
837
mask_nomissing = _mask_missing_value (
822
838
y , y_hat , sample_weight_sqrt , return_mask = True
823
839
)
824
- y_masked = y [mask_nomissing ]
825
- y_hat_masked = y_hat [mask_nomissing ]
840
+
826
841
sample_weight_sqrt_masked = sample_weight_sqrt [mask_nomissing ]
827
842
dydx_masked = dydx [mask_nomissing ]
828
843
829
- e = y_hat_masked - y_masked
830
- return (e [:, np .newaxis , :] * dydx_masked ).sum (
831
- axis = 2
832
- ) * sample_weight_sqrt_masked
844
+ return dydx_masked .sum (axis = 1 ) * sample_weight_sqrt_masked
833
845
834
846
@validate_params (
835
847
{
0 commit comments