@@ -406,10 +406,10 @@ class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
406406 >>> print_narx(narx)
407407 | yid | Term | Coef |
408408 =======================================
409- | 0 | Intercept | 1.069 |
410- | 0 | y_hat[k-1,0] | 0.478 |
411- | 0 | X[k-2,0] | 0.716 |
412- | 0 | X[k-1,0]*X[k-3,0] | 1.504 |
409+ | 0 | Intercept | 1.008 |
410+ | 0 | y_hat[k-1,0] | 0.498 |
411+ | 0 | X[k-2,0] | 0.701 |
412+ | 0 | X[k-1,0]*X[k-3,0] | 1.496 |
413413 """
414414
415415 _parameter_constraints : dict = {
@@ -466,7 +466,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
466466
467467 **params : dict
468468 Keyword arguments passed to
469- `scipy.optimize.minimize `.
469+ `scipy.optimize.least_squares `.
470470
471471 Returns
472472 -------
@@ -601,7 +601,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
601601 coef_init = check_array (
602602 coef_init ,
603603 ensure_2d = False ,
604- dtype = np . float64 ,
604+ dtype = float ,
605605 )
606606 if coef_init .shape [0 ] != n_coef_intercept :
607607 raise ValueError (
@@ -634,15 +634,15 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
634634 return self
635635
636636 @staticmethod
637- def _evaluate_term (term_id , delay_id , X , y_hat , k ):
637+ def _evaluate_term (feat_ids , delay_ids , X , y_hat , k ):
638638 n_features_in = X .shape [1 ]
639639 term = 1
640- for i , feat_id in enumerate (term_id ):
640+ for i , feat_id in enumerate (feat_ids ):
641641 if feat_id != - 1 :
642642 if feat_id < n_features_in :
643- term *= X [k - delay_id [i ], feat_id ]
643+ term *= X [k - delay_ids [i ], feat_id ]
644644 else :
645- term *= y_hat [k - delay_id [i ], feat_id - n_features_in ]
645+ term *= y_hat [k - delay_ids [i ], feat_id - n_features_in ]
646646 return term
647647
648648 @staticmethod
@@ -666,7 +666,7 @@ def _predict(
666666 at_init = True
667667 init_k = 0
668668 for k in range (n_samples ):
669- if ~ np .all (np .isfinite (X [k ])):
669+ if not np .all (np .isfinite (X [k ])):
670670 at_init = True
671671 init_k = k + 1
672672 y_hat [k ] = np .nan
@@ -687,85 +687,101 @@ def _predict(
687687
688688 @staticmethod
689689 def _get_cfd_ids (feat_ids , delay_ids , output_ids , n_features_in ):
690- n_y = np .max (output_ids ) + 1 # number of output
691- n_d = np .max (delay_ids ) # max delay
692-
693- n_c = feat_ids .shape [0 ] # number of coef
694- # number of dy/dx, [dy0(k)/dx, dy1(k)/dx, dy0(k-1)/dx, dy1(k-1)/dx, ...]
695- n_dydx = n_y * n_d
696- c_ids = np .arange (n_c ) # Coef index
697-
698- # Coef ids, feature ids, delay ids
699- # cfd_ids is n_y * n_dydx
700- cfd_ids = [[[] for _ in range (n_dydx )] for _ in range (n_y )]
701- for i in range (n_y ):
702- for j in range (n_dydx ):
703- # Get dy[y_j](k - d_j)/dx
704- d_j = j // n_y + 1 # delay
705- y_j = j % n_y + n_features_in # output index
706- output_mask = output_ids == i
707- terms = feat_ids [output_mask ]
708- delays = delay_ids [output_mask ]
709- c_id = c_ids [output_mask ]
710- for t , (term , delay ) in enumerate (zip (terms , delays )):
711- if np .any ((y_j == term ) & (d_j == delay )):
712- a_ij = []
713- for f , (feat , k ) in enumerate (zip (term , delay )):
714- if (feat == y_j ) and (k == d_j ):
715- a_ij += [
716- [c_id [t ], np .delete (term , f ), np .delete (delay , f )]
717- ]
718- cfd_ids [i ][j ] += a_ij
690+ """
691+ Get ids of CFD (Coef, Feature, and Delay) matrix to update dyn(k)/dx.
692+ Maps coefficients to their corresponding features and delays.
693+ """
694+ n_outputs = np .max (output_ids ) + 1
695+ max_delay = np .max (delay_ids )
696+
697+ # Initialize cfd_ids as a list of lists n_outputs * n_outputs * max_delay
698+ # axis-0 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx]
699+ # axis-1 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx]
700+ # axis-2 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx]
701+ cfd_ids = [
702+ [[[] for _ in range (max_delay )] for _ in range (n_outputs )]
703+ for _ in range (n_outputs )
704+ ]
705+
706+ for coef_id , (term_feat_ids , term_delay_ids ) in enumerate (
707+ zip (feat_ids , delay_ids )
708+ ):
709+ row_y_id = output_ids [coef_id ]
710+ for var_id , (feat_id , delay_id ) in enumerate (
711+ zip (term_feat_ids , term_delay_ids )
712+ ):
713+ if feat_id >= n_features_in and delay_id > 0 :
714+ col_y_id = feat_id - n_features_in
715+ cfd_ids [row_y_id ][col_y_id ][delay_id - 1 ].append (
716+ [
717+ coef_id ,
718+ np .delete (term_feat_ids , var_id ),
719+ np .delete (term_delay_ids , var_id ),
720+ ]
721+ )
722+
719723 return cfd_ids
720724
721725 @staticmethod
722726 def _update_cfd (X , y_hat , coef , cfd_ids , k ):
723- n_y = y_hat .shape [1 ]
724- n_dydx = len (cfd_ids [0 ])
725- cfd = np .zeros ((n_y , n_dydx ))
726- for i in range (n_y ):
727- for j in range (n_dydx ):
728- if cfd_ids [i ][j ]:
729- a_ij = 0
730- for coef_id , term_id , delay_id in cfd_ids [i ][j ]:
731- a_ij += coef [coef_id ] * NARX ._evaluate_term (
732- term_id , delay_id , X , y_hat , k
727+ """
728+ Updates CFD matrix based on the current state.
729+ """
730+ n_outputs , max_delay = y_hat .shape [1 ], len (cfd_ids [0 ][0 ])
731+ cfd = np .zeros ((n_outputs , n_outputs , max_delay ))
732+
733+ for i , yi_ids in enumerate (cfd_ids ):
734+ for j , yiyj in enumerate (yi_ids ):
735+ for d , yiyjd in enumerate (yiyj ):
736+ if yiyjd :
737+ cfd [i , j , d ] = sum (
738+ coef [coef_id ]
739+ * NARX ._evaluate_term (feat_id , delay_id , X , y_hat , k )
740+ for coef_id , feat_id , delay_id in yiyjd
733741 )
734- cfd [i , j ] = a_ij
735742 return cfd
736743
737744 @staticmethod
738745 def _update_dydx (X , y_hat , coef , feat_ids , delay_ids , output_ids , cfd_ids ):
739- n_samples = X .shape [0 ]
740- n_y = y_hat .shape [1 ]
746+ """
747+ Computation of the Jacobian matrix dydx.
748+
749+ Returns
750+ -------
751+ dydx : ndarray of shape (n_samples, n_outputs, n_x)
752+ Jacobian matrix of the outputs with respect to coefficients and intercepts.
753+ """
754+ n_samples , n_y = y_hat .shape
741755 max_delay = np .max (delay_ids )
742- n_c = feat_ids .shape [0 ]
743- n_x = n_c + n_y
744- output_x_ids = np .r_ [output_ids , np .arange (n_y )]
745- if max_delay == 0 :
746- dydx = np .zeros ((n_samples , n_x , n_y ))
747- else :
748- dydx = np .zeros ((n_samples , n_x , n_y * max_delay ))
749- for k in range (max_delay , n_samples ):
750- for i in range (n_x ):
751- if i < n_c :
752- term = NARX ._evaluate_term (feat_ids [i ], delay_ids [i ], X , y_hat , k )
753- else :
754- term = 1
756+ n_coefs = feat_ids .shape [0 ]
757+ n_x = n_coefs + n_y # Total number of coefficients and intercepts
758+ y_ids = np .r_ [output_ids , np .arange (n_y )]
759+ x_ids = np .arange (n_x )
755760
756- if ~ np .isfinite (term ):
757- continue
758- dydx [k , i , output_x_ids [i ]] = term
759- if max_delay != 0 :
760- cfd = NARX ._update_cfd (X , y_hat , coef , cfd_ids , k )
761- if ~ np .all (np .isfinite (cfd )):
762- continue
763- dydx [k , i , :n_y ] += cfd @ dydx [k - 1 , i ]
764- dydx [k , i , n_y :] = dydx [k - 1 , i , :- n_y ]
761+ dydx = np .zeros ((n_samples , n_y , n_x ), dtype = float )
762+ for k in range (max_delay , n_samples ):
763+ # Compute terms for time step k
764+ terms = np .ones (n_x , dtype = float )
765+ for j in range (n_coefs ):
766+ terms [j ] = NARX ._evaluate_term (feat_ids [j ], delay_ids [j ], X , y_hat , k )
767+ if not np .all (np .isfinite (terms )):
768+ break
769+
770+ # Update constant terms of Jacobian
771+ dydx [k , y_ids , x_ids ] = terms
772+
773+ # Update dynamic terms of Jacobian
774+ if max_delay > 0 :
775+ cfd = NARX ._update_cfd (X , y_hat , coef , cfd_ids , k )
776+ for d in range (max_delay ):
777+ dydx [k ] += cfd [:, :, d ] @ dydx [k - d - 1 ]
778+
779+ # Handle divergence
765780 if np .any (dydx [k ] > 1e20 ):
766781 dydx [k :] = 1e20
767- return dydx [:, :, :n_y ]
768- return dydx [:, :, :n_y ]
782+ break
783+
784+ return dydx
769785
770786 @staticmethod
771787 def _loss (
@@ -821,15 +837,11 @@ def _grad(
821837 mask_nomissing = _mask_missing_value (
822838 y , y_hat , sample_weight_sqrt , return_mask = True
823839 )
824- y_masked = y [mask_nomissing ]
825- y_hat_masked = y_hat [mask_nomissing ]
840+
826841 sample_weight_sqrt_masked = sample_weight_sqrt [mask_nomissing ]
827842 dydx_masked = dydx [mask_nomissing ]
828843
829- e = y_hat_masked - y_masked
830- return (e [:, np .newaxis , :] * dydx_masked ).sum (
831- axis = 2
832- ) * sample_weight_sqrt_masked
844+ return dydx_masked .sum (axis = 1 ) * sample_weight_sqrt_masked
833845
834846 @validate_params (
835847 {
0 commit comments