1313from sklearn .base import BaseEstimator , MultiOutputMixin , RegressorMixin
1414from sklearn .linear_model import LinearRegression
1515from sklearn .utils import check_array , check_consistent_length , column_or_1d
16+ from sklearn .utils ._openmp_helpers import _openmp_effective_n_threads
1617from sklearn .utils ._param_validation import Interval , StrOptions , validate_params
1718from sklearn .utils .validation import (
1819 _check_sample_weight ,
2930 make_time_shift_features ,
3031)
3132from ._narx_fast import ( # type: ignore[attr-defined]
32- _predict_step ,
33- _update_cfd ,
34- _update_terms ,
33+ _predict ,
34+ _update_dydx ,
3535)
3636
3737
@@ -341,7 +341,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
341341 f"({ (n_coef_intercept ,)} ), but got { coef_init .shape } ."
342342 )
343343
344- grad_yyd_ids , grad_coef_ids , grad_feat_ids , grad_delay_ids = NARX ._get_cfd_ids (
344+ grad_yyd_ids , grad_delay_ids , grad_coef_ids , grad_feat_ids = NARX ._get_dcf_ids (
345345 self .feat_ids_ , self .delay_ids_ , self .output_ids_ , X .shape [1 ]
346346 )
347347 sample_weight_sqrt = np .sqrt (sample_weight ).reshape (- 1 , 1 )
@@ -350,7 +350,6 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
350350 x0 = coef_init ,
351351 jac = NARX ._grad ,
352352 args = (
353- _predict_step ,
354353 X ,
355354 y ,
356355 self .feat_ids_ ,
@@ -359,9 +358,9 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
359358 self .fit_intercept ,
360359 sample_weight_sqrt ,
361360 grad_yyd_ids ,
361+ grad_delay_ids ,
362362 grad_coef_ids ,
363363 grad_feat_ids ,
364- grad_delay_ids ,
365364 ),
366365 ** params ,
367366 )
@@ -374,64 +373,17 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
374373 return self
375374
376375 @staticmethod
377- def _predict (
378- expression ,
379- X ,
380- y_ref ,
381- coef ,
382- intercept ,
383- feat_ids ,
384- delay_ids ,
385- output_ids ,
386- ):
387- n_samples = X .shape [0 ]
388- n_ref , n_outputs = y_ref .shape
389- max_delay = np .max (delay_ids )
390- y_hat = np .zeros ((n_samples , n_outputs ), dtype = float )
391- at_init = True
392- init_k = 0
393- for k in range (n_samples ):
394- if not np .all (np .isfinite (X [k ])):
395- at_init = True
396- init_k = k + 1
397- y_hat [k ] = np .nan
398- continue
399- if k - init_k == max_delay :
400- at_init = False
401-
402- if at_init :
403- y_hat [k ] = y_ref [k % n_ref ]
404- if not np .all (np .isfinite (y_hat [k ])):
405- at_init = True
406- init_k = k + 1
407- else :
408- y_hat [k ] = intercept
409- expression (
410- X ,
411- y_hat ,
412- y_hat [k ],
413- coef ,
414- feat_ids ,
415- delay_ids ,
416- output_ids ,
417- k ,
418- )
419- if np .any (y_hat [k ] > 1e20 ):
420- y_hat [k :] = 1e20
421- return y_hat
422- return y_hat
423-
424- @staticmethod
425- def _get_cfd_ids (feat_ids , delay_ids , output_ids , n_features_in ):
376+ def _get_dcf_ids (feat_ids , delay_ids , output_ids , n_features_in ):
426377 """
427- Get ids of CFD (Coef, Feature , and Delay ) matrix to update dyn(k)/dx.
378+ Get ids of DCF (Delay, Coef , and Feature ) matrix to update dyn(k)/dx.
428379 Maps coefficients to their corresponding features and delays.
429380 """
430381
431- # Initialize cfd_ids as a list of lists n_outputs * n_outputs * max_delay
432- # axis-0 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx]
433- # axis-1 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx]
434- # axis-2 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx]
382+ # Initialize dcf_ids as a list of lists max_delay * n_outputs * n_outputs
383+ # axis-0 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx]
384+ # axis-1 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx]
385+ # axis-2 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx]
386+
435387 grad_yyd_ids = []
436388 grad_coef_ids = []
437389 grad_feat_ids = []
@@ -447,106 +399,20 @@ def _get_cfd_ids(feat_ids, delay_ids, output_ids, n_features_in):
447399 if feat_id >= n_features_in and delay_id > 0 :
448400 col_y_id = feat_id - n_features_in # y(k-1, id)
449401 grad_yyd_ids .append ([row_y_id , col_y_id , delay_id - 1 ])
402+ grad_delay_ids .append (np .delete (term_delay_ids , var_id ))
450403 grad_coef_ids .append (coef_id )
451404 grad_feat_ids .append (np .delete (term_feat_ids , var_id ))
452- grad_delay_ids .append (np .delete (term_delay_ids , var_id ))
453405
454406 return (
455- np .array (grad_yyd_ids , dtype = np .int32 ),
407+ np .array (grad_yyd_ids , dtype = np .int32 , ndmin = 2 ),
408+ np .array (grad_delay_ids , dtype = np .int32 , ndmin = 2 ),
456409 np .array (grad_coef_ids , dtype = np .int32 ),
457- np .array (grad_feat_ids , dtype = np .int32 ),
458- np .array (grad_delay_ids , dtype = np .int32 ),
410+ np .array (grad_feat_ids , dtype = np .int32 , ndmin = 2 ),
459411 )
460412
461- @staticmethod
462- def _update_dydx (
463- X ,
464- y_hat ,
465- coef ,
466- feat_ids ,
467- delay_ids ,
468- output_ids ,
469- fit_intercept ,
470- grad_yyd_ids ,
471- grad_coef_ids ,
472- grad_feat_ids ,
473- grad_delay_ids ,
474- ):
475- """
476- Computation of the Jacobian matrix dydx.
477-
478- Returns
479- -------
480- dydx : ndarray of shape (n_samples, n_outputs, n_x)
481- Jacobian matrix of the outputs with respect to coefficients and intercepts.
482- """
483- n_samples , n_y = y_hat .shape
484- max_delay = np .max (delay_ids )
485- n_coefs = feat_ids .shape [0 ]
486- if fit_intercept :
487- n_x = n_coefs + n_y # Total number of coefficients and intercepts
488- y_ids = np .r_ [output_ids , np .arange (n_y )]
489- else :
490- n_x = n_coefs
491- y_ids = output_ids
492-
493- x_ids = np .arange (n_x )
494-
495- dydx = np .zeros ((n_samples , n_y , n_x ), dtype = float )
496- at_init = True
497- init_k = 0
498- for k in range (n_samples ):
499- if not (np .all (np .isfinite (X [k ])) and np .all (np .isfinite (y_hat [k ]))):
500- at_init = True
501- init_k = k + 1
502- continue
503- if k - init_k == max_delay :
504- at_init = False
505-
506- if at_init :
507- continue
508- # Compute terms for time step k
509- terms = np .ones (n_x , dtype = float )
510- _update_terms (
511- X ,
512- y_hat ,
513- terms ,
514- feat_ids ,
515- delay_ids ,
516- k ,
517- )
518-
519- # Update constant terms of Jacobian
520- dydx [k , y_ids , x_ids ] = terms
521-
522- # Update dynamic terms of Jacobian
523- if max_delay > 0 and grad_yyd_ids .size > 0 :
524- cfd = np .zeros ((n_y , n_y , max_delay ), dtype = float )
525- _update_cfd (
526- X ,
527- y_hat ,
528- cfd ,
529- coef ,
530- grad_yyd_ids ,
531- grad_coef_ids ,
532- grad_feat_ids ,
533- grad_delay_ids ,
534- k ,
535- )
536- for d in range (max_delay ):
537- dydx [k ] += cfd [:, :, d ] @ dydx [k - d - 1 ]
538-
539- # Handle divergence
540- if np .any (dydx [k ] > 1e20 ):
541- dydx [k :] = 1e20
542- break
543-
544- return dydx
545-
546413 @staticmethod
547414 def _loss (
548415 coef_intercept ,
549- expression ,
550416 X ,
551417 y ,
552418 feat_ids ,
@@ -557,23 +423,24 @@ def _loss(
557423 * args ,
558424 ):
559425 # Sum of squared errors
560- n_outputs = y .shape [ 1 ]
426+ n_samples , n_y = y .shape
561427 if fit_intercept :
562- coef = coef_intercept [:- n_outputs ]
563- intercept = coef_intercept [- n_outputs :]
428+ coef = coef_intercept [:- n_y ]
429+ intercept = coef_intercept [- n_y :]
564430 else :
565431 coef = coef_intercept
566- intercept = np .zeros (n_outputs , dtype = float )
432+ intercept = np .zeros (n_y , dtype = float )
567433
568- y_hat = NARX . _predict (
569- expression ,
434+ y_hat = np . zeros (( n_samples , n_y ), dtype = float )
435+ _predict (
570436 X ,
571437 y ,
572438 coef ,
573439 intercept ,
574440 feat_ids ,
575441 delay_ids ,
576442 output_ids ,
443+ y_hat ,
577444 )
578445
579446 y_masked , y_hat_masked , sample_weight_sqrt_masked = mask_missing_values (
@@ -585,7 +452,6 @@ def _loss(
585452 @staticmethod
586453 def _grad (
587454 coef_intercept ,
588- expression ,
589455 X ,
590456 y ,
591457 feat_ids ,
@@ -594,41 +460,57 @@ def _grad(
594460 fit_intercept ,
595461 sample_weight_sqrt ,
596462 grad_yyd_ids ,
463+ grad_delay_ids ,
597464 grad_coef_ids ,
598465 grad_feat_ids ,
599- grad_delay_ids ,
600466 ):
601467 # Sum of squared errors
602- n_outputs = y .shape [ 1 ]
468+ n_samples , n_outputs = y .shape
603469 if fit_intercept :
604470 coef = coef_intercept [:- n_outputs ]
605471 intercept = coef_intercept [- n_outputs :]
606472 else :
607473 coef = coef_intercept
608474 intercept = np .zeros (n_outputs , dtype = float )
609475
610- y_hat = NARX . _predict (
611- expression ,
476+ y_hat = np . zeros (( n_samples , n_outputs ), dtype = float )
477+ _predict (
612478 X ,
613479 y ,
614480 coef ,
615481 intercept ,
616482 feat_ids ,
617483 delay_ids ,
618484 output_ids ,
485+ y_hat ,
619486 )
620- dydx = NARX ._update_dydx (
487+
488+ max_delay = np .max (delay_ids )
489+ n_coefs = feat_ids .shape [0 ]
490+ if fit_intercept :
491+ n_x = n_coefs + n_outputs # Total number of coefficients and intercepts
492+ y_ids = np .r_ [output_ids , np .arange (n_outputs , dtype = np .int32 )]
493+ else :
494+ n_x = n_coefs
495+ y_ids = output_ids
496+ dydx = np .zeros ((n_samples , n_outputs , n_x ), dtype = float )
497+ dcf = np .zeros ((max_delay , n_outputs , n_outputs ), dtype = float )
498+ n_threads = _openmp_effective_n_threads ()
499+
500+ _update_dydx (
621501 X ,
622502 y_hat ,
623503 coef ,
624504 feat_ids ,
625505 delay_ids ,
626- output_ids ,
627- fit_intercept ,
506+ y_ids ,
628507 grad_yyd_ids ,
508+ grad_delay_ids ,
629509 grad_coef_ids ,
630510 grad_feat_ids ,
631- grad_delay_ids ,
511+ dydx ,
512+ dcf ,
513+ n_threads ,
632514 )
633515
634516 mask_valid = mask_missing_values (y , y_hat , sample_weight_sqrt , return_mask = True )
@@ -702,15 +584,16 @@ def predict(self, X, y_init=None):
702584 f"`y_init` should have { self .n_outputs_ } outputs "
703585 f"but got { y_init .shape [1 ]} ."
704586 )
705- y_hat = NARX . _predict (
706- _predict_step ,
587+ y_hat = np . zeros (( X . shape [ 0 ], self . n_outputs_ ), dtype = float )
588+ _predict (
707589 X ,
708590 y_init ,
709591 self .coef_ ,
710592 self .intercept_ ,
711593 self .feat_ids_ ,
712594 self .delay_ids_ ,
713595 self .output_ids_ ,
596+ y_hat ,
714597 )
715598 if self .n_outputs_ == 1 :
716599 y_hat = y_hat .flatten ()
0 commit comments