Skip to content

Commit 48cc650

Browse files
MNT speedup narx grad (#157)
1 parent 55d9e06 commit 48cc650

File tree

6 files changed

+286
-231
lines changed

6 files changed

+286
-231
lines changed

fastcan/narx/_base.py

Lines changed: 50 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
1414
from sklearn.linear_model import LinearRegression
1515
from sklearn.utils import check_array, check_consistent_length, column_or_1d
16+
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
1617
from sklearn.utils._param_validation import Interval, StrOptions, validate_params
1718
from sklearn.utils.validation import (
1819
_check_sample_weight,
@@ -29,9 +30,8 @@
2930
make_time_shift_features,
3031
)
3132
from ._narx_fast import ( # type: ignore[attr-defined]
32-
_predict_step,
33-
_update_cfd,
34-
_update_terms,
33+
_predict,
34+
_update_dydx,
3535
)
3636

3737

@@ -341,7 +341,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
341341
f"({(n_coef_intercept,)}), but got {coef_init.shape}."
342342
)
343343

344-
grad_yyd_ids, grad_coef_ids, grad_feat_ids, grad_delay_ids = NARX._get_cfd_ids(
344+
grad_yyd_ids, grad_delay_ids, grad_coef_ids, grad_feat_ids = NARX._get_dcf_ids(
345345
self.feat_ids_, self.delay_ids_, self.output_ids_, X.shape[1]
346346
)
347347
sample_weight_sqrt = np.sqrt(sample_weight).reshape(-1, 1)
@@ -350,7 +350,6 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
350350
x0=coef_init,
351351
jac=NARX._grad,
352352
args=(
353-
_predict_step,
354353
X,
355354
y,
356355
self.feat_ids_,
@@ -359,9 +358,9 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
359358
self.fit_intercept,
360359
sample_weight_sqrt,
361360
grad_yyd_ids,
361+
grad_delay_ids,
362362
grad_coef_ids,
363363
grad_feat_ids,
364-
grad_delay_ids,
365364
),
366365
**params,
367366
)
@@ -374,64 +373,17 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
374373
return self
375374

376375
@staticmethod
377-
def _predict(
378-
expression,
379-
X,
380-
y_ref,
381-
coef,
382-
intercept,
383-
feat_ids,
384-
delay_ids,
385-
output_ids,
386-
):
387-
n_samples = X.shape[0]
388-
n_ref, n_outputs = y_ref.shape
389-
max_delay = np.max(delay_ids)
390-
y_hat = np.zeros((n_samples, n_outputs), dtype=float)
391-
at_init = True
392-
init_k = 0
393-
for k in range(n_samples):
394-
if not np.all(np.isfinite(X[k])):
395-
at_init = True
396-
init_k = k + 1
397-
y_hat[k] = np.nan
398-
continue
399-
if k - init_k == max_delay:
400-
at_init = False
401-
402-
if at_init:
403-
y_hat[k] = y_ref[k % n_ref]
404-
if not np.all(np.isfinite(y_hat[k])):
405-
at_init = True
406-
init_k = k + 1
407-
else:
408-
y_hat[k] = intercept
409-
expression(
410-
X,
411-
y_hat,
412-
y_hat[k],
413-
coef,
414-
feat_ids,
415-
delay_ids,
416-
output_ids,
417-
k,
418-
)
419-
if np.any(y_hat[k] > 1e20):
420-
y_hat[k:] = 1e20
421-
return y_hat
422-
return y_hat
423-
424-
@staticmethod
425-
def _get_cfd_ids(feat_ids, delay_ids, output_ids, n_features_in):
376+
def _get_dcf_ids(feat_ids, delay_ids, output_ids, n_features_in):
426377
"""
427-
Get ids of CFD (Coef, Feature, and Delay) matrix to update dyn(k)/dx.
378+
Get ids of DCF (Delay, Coef, and Feature) matrix to update dyn(k)/dx.
428379
Maps coefficients to their corresponding features and delays.
429380
"""
430381

431-
# Initialize cfd_ids as a list of lists n_outputs * n_outputs * max_delay
432-
# axis-0 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx]
433-
# axis-1 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx]
434-
# axis-2 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx]
382+
# Initialize dcf_ids as a list of lists max_delay * n_outputs * n_outputs
383+
# axis-0 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx]
384+
# axis-1 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx]
385+
# axis-2 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx]
386+
435387
grad_yyd_ids = []
436388
grad_coef_ids = []
437389
grad_feat_ids = []
@@ -447,106 +399,20 @@ def _get_cfd_ids(feat_ids, delay_ids, output_ids, n_features_in):
447399
if feat_id >= n_features_in and delay_id > 0:
448400
col_y_id = feat_id - n_features_in # y(k-1, id)
449401
grad_yyd_ids.append([row_y_id, col_y_id, delay_id - 1])
402+
grad_delay_ids.append(np.delete(term_delay_ids, var_id))
450403
grad_coef_ids.append(coef_id)
451404
grad_feat_ids.append(np.delete(term_feat_ids, var_id))
452-
grad_delay_ids.append(np.delete(term_delay_ids, var_id))
453405

454406
return (
455-
np.array(grad_yyd_ids, dtype=np.int32),
407+
np.array(grad_yyd_ids, dtype=np.int32, ndmin=2),
408+
np.array(grad_delay_ids, dtype=np.int32, ndmin=2),
456409
np.array(grad_coef_ids, dtype=np.int32),
457-
np.array(grad_feat_ids, dtype=np.int32),
458-
np.array(grad_delay_ids, dtype=np.int32),
410+
np.array(grad_feat_ids, dtype=np.int32, ndmin=2),
459411
)
460412

461-
@staticmethod
462-
def _update_dydx(
463-
X,
464-
y_hat,
465-
coef,
466-
feat_ids,
467-
delay_ids,
468-
output_ids,
469-
fit_intercept,
470-
grad_yyd_ids,
471-
grad_coef_ids,
472-
grad_feat_ids,
473-
grad_delay_ids,
474-
):
475-
"""
476-
Computation of the Jacobian matrix dydx.
477-
478-
Returns
479-
-------
480-
dydx : ndarray of shape (n_samples, n_outputs, n_x)
481-
Jacobian matrix of the outputs with respect to coefficients and intercepts.
482-
"""
483-
n_samples, n_y = y_hat.shape
484-
max_delay = np.max(delay_ids)
485-
n_coefs = feat_ids.shape[0]
486-
if fit_intercept:
487-
n_x = n_coefs + n_y # Total number of coefficients and intercepts
488-
y_ids = np.r_[output_ids, np.arange(n_y)]
489-
else:
490-
n_x = n_coefs
491-
y_ids = output_ids
492-
493-
x_ids = np.arange(n_x)
494-
495-
dydx = np.zeros((n_samples, n_y, n_x), dtype=float)
496-
at_init = True
497-
init_k = 0
498-
for k in range(n_samples):
499-
if not (np.all(np.isfinite(X[k])) and np.all(np.isfinite(y_hat[k]))):
500-
at_init = True
501-
init_k = k + 1
502-
continue
503-
if k - init_k == max_delay:
504-
at_init = False
505-
506-
if at_init:
507-
continue
508-
# Compute terms for time step k
509-
terms = np.ones(n_x, dtype=float)
510-
_update_terms(
511-
X,
512-
y_hat,
513-
terms,
514-
feat_ids,
515-
delay_ids,
516-
k,
517-
)
518-
519-
# Update constant terms of Jacobian
520-
dydx[k, y_ids, x_ids] = terms
521-
522-
# Update dynamic terms of Jacobian
523-
if max_delay > 0 and grad_yyd_ids.size > 0:
524-
cfd = np.zeros((n_y, n_y, max_delay), dtype=float)
525-
_update_cfd(
526-
X,
527-
y_hat,
528-
cfd,
529-
coef,
530-
grad_yyd_ids,
531-
grad_coef_ids,
532-
grad_feat_ids,
533-
grad_delay_ids,
534-
k,
535-
)
536-
for d in range(max_delay):
537-
dydx[k] += cfd[:, :, d] @ dydx[k - d - 1]
538-
539-
# Handle divergence
540-
if np.any(dydx[k] > 1e20):
541-
dydx[k:] = 1e20
542-
break
543-
544-
return dydx
545-
546413
@staticmethod
547414
def _loss(
548415
coef_intercept,
549-
expression,
550416
X,
551417
y,
552418
feat_ids,
@@ -557,23 +423,24 @@ def _loss(
557423
*args,
558424
):
559425
# Sum of squared errors
560-
n_outputs = y.shape[1]
426+
n_samples, n_y = y.shape
561427
if fit_intercept:
562-
coef = coef_intercept[:-n_outputs]
563-
intercept = coef_intercept[-n_outputs:]
428+
coef = coef_intercept[:-n_y]
429+
intercept = coef_intercept[-n_y:]
564430
else:
565431
coef = coef_intercept
566-
intercept = np.zeros(n_outputs, dtype=float)
432+
intercept = np.zeros(n_y, dtype=float)
567433

568-
y_hat = NARX._predict(
569-
expression,
434+
y_hat = np.zeros((n_samples, n_y), dtype=float)
435+
_predict(
570436
X,
571437
y,
572438
coef,
573439
intercept,
574440
feat_ids,
575441
delay_ids,
576442
output_ids,
443+
y_hat,
577444
)
578445

579446
y_masked, y_hat_masked, sample_weight_sqrt_masked = mask_missing_values(
@@ -585,7 +452,6 @@ def _loss(
585452
@staticmethod
586453
def _grad(
587454
coef_intercept,
588-
expression,
589455
X,
590456
y,
591457
feat_ids,
@@ -594,41 +460,57 @@ def _grad(
594460
fit_intercept,
595461
sample_weight_sqrt,
596462
grad_yyd_ids,
463+
grad_delay_ids,
597464
grad_coef_ids,
598465
grad_feat_ids,
599-
grad_delay_ids,
600466
):
601467
# Sum of squared errors
602-
n_outputs = y.shape[1]
468+
n_samples, n_outputs = y.shape
603469
if fit_intercept:
604470
coef = coef_intercept[:-n_outputs]
605471
intercept = coef_intercept[-n_outputs:]
606472
else:
607473
coef = coef_intercept
608474
intercept = np.zeros(n_outputs, dtype=float)
609475

610-
y_hat = NARX._predict(
611-
expression,
476+
y_hat = np.zeros((n_samples, n_outputs), dtype=float)
477+
_predict(
612478
X,
613479
y,
614480
coef,
615481
intercept,
616482
feat_ids,
617483
delay_ids,
618484
output_ids,
485+
y_hat,
619486
)
620-
dydx = NARX._update_dydx(
487+
488+
max_delay = np.max(delay_ids)
489+
n_coefs = feat_ids.shape[0]
490+
if fit_intercept:
491+
n_x = n_coefs + n_outputs # Total number of coefficients and intercepts
492+
y_ids = np.r_[output_ids, np.arange(n_outputs, dtype=np.int32)]
493+
else:
494+
n_x = n_coefs
495+
y_ids = output_ids
496+
dydx = np.zeros((n_samples, n_outputs, n_x), dtype=float)
497+
dcf = np.zeros((max_delay, n_outputs, n_outputs), dtype=float)
498+
n_threads = _openmp_effective_n_threads()
499+
500+
_update_dydx(
621501
X,
622502
y_hat,
623503
coef,
624504
feat_ids,
625505
delay_ids,
626-
output_ids,
627-
fit_intercept,
506+
y_ids,
628507
grad_yyd_ids,
508+
grad_delay_ids,
629509
grad_coef_ids,
630510
grad_feat_ids,
631-
grad_delay_ids,
511+
dydx,
512+
dcf,
513+
n_threads,
632514
)
633515

634516
mask_valid = mask_missing_values(y, y_hat, sample_weight_sqrt, return_mask=True)
@@ -702,15 +584,16 @@ def predict(self, X, y_init=None):
702584
f"`y_init` should have {self.n_outputs_} outputs "
703585
f"but got {y_init.shape[1]}."
704586
)
705-
y_hat = NARX._predict(
706-
_predict_step,
587+
y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=float)
588+
_predict(
707589
X,
708590
y_init,
709591
self.coef_,
710592
self.intercept_,
711593
self.feat_ids_,
712594
self.delay_ids_,
713595
self.output_ids_,
596+
y_hat,
714597
)
715598
if self.n_outputs_ == 1:
716599
y_hat = y_hat.flatten()

0 commit comments

Comments
 (0)