diff --git a/fastcan/narx/_base.py b/fastcan/narx/_base.py index 59d4080..fe95b83 100644 --- a/fastcan/narx/_base.py +++ b/fastcan/narx/_base.py @@ -13,6 +13,7 @@ from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin from sklearn.linear_model import LinearRegression from sklearn.utils import check_array, check_consistent_length, column_or_1d +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads from sklearn.utils._param_validation import Interval, StrOptions, validate_params from sklearn.utils.validation import ( _check_sample_weight, @@ -29,9 +30,8 @@ make_time_shift_features, ) from ._narx_fast import ( # type: ignore[attr-defined] - _predict_step, - _update_cfd, - _update_terms, + _predict, + _update_dydx, ) @@ -341,7 +341,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): f"({(n_coef_intercept,)}), but got {coef_init.shape}." ) - grad_yyd_ids, grad_coef_ids, grad_feat_ids, grad_delay_ids = NARX._get_cfd_ids( + grad_yyd_ids, grad_delay_ids, grad_coef_ids, grad_feat_ids = NARX._get_dcf_ids( self.feat_ids_, self.delay_ids_, self.output_ids_, X.shape[1] ) sample_weight_sqrt = np.sqrt(sample_weight).reshape(-1, 1) @@ -350,7 +350,6 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): x0=coef_init, jac=NARX._grad, args=( - _predict_step, X, y, self.feat_ids_, @@ -359,9 +358,9 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): self.fit_intercept, sample_weight_sqrt, grad_yyd_ids, + grad_delay_ids, grad_coef_ids, grad_feat_ids, - grad_delay_ids, ), **params, ) @@ -374,64 +373,17 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): return self @staticmethod - def _predict( - expression, - X, - y_ref, - coef, - intercept, - feat_ids, - delay_ids, - output_ids, - ): - n_samples = X.shape[0] - n_ref, n_outputs = y_ref.shape - max_delay = np.max(delay_ids) - y_hat = np.zeros((n_samples, n_outputs), dtype=float) - at_init = True - init_k = 0 - for k in range(n_samples): - if not np.all(np.isfinite(X[k])): - at_init = True - init_k = k + 1 - y_hat[k] = np.nan - continue - if k - init_k == max_delay: - at_init = False - - if at_init: - y_hat[k] = y_ref[k % n_ref] - if not np.all(np.isfinite(y_hat[k])): - at_init = True - init_k = k + 1 - else: - y_hat[k] = intercept - expression( - X, - y_hat, - y_hat[k], - coef, - feat_ids, - delay_ids, - output_ids, - k, - ) - if np.any(y_hat[k] > 1e20): - y_hat[k:] = 1e20 - return y_hat - return y_hat - - @staticmethod - def _get_cfd_ids(feat_ids, delay_ids, output_ids, n_features_in): + def _get_dcf_ids(feat_ids, delay_ids, output_ids, n_features_in): """ - Get ids of CFD (Coef, Feature, and Delay) matrix to update dyn(k)/dx. + Get ids of DCF (Delay, Coef, and Feature) matrix to update dyn(k)/dx. Maps coefficients to their corresponding features and delays. """ - # Initialize cfd_ids as a list of lists n_outputs * n_outputs * max_delay - # axis-0 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx] - # axis-1 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx] - # axis-2 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx] + # Initialize dcf_ids as a list of lists max_delay * n_outputs * n_outputs + # axis-0 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx] + # axis-1 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx] + # axis-2 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx] + grad_yyd_ids = [] grad_coef_ids = [] grad_feat_ids = [] @@ -447,106 +399,20 @@ def _get_cfd_ids(feat_ids, delay_ids, output_ids, n_features_in): if feat_id >= n_features_in and delay_id > 0: col_y_id = feat_id - n_features_in # y(k-1, id) grad_yyd_ids.append([row_y_id, col_y_id, delay_id - 1]) + grad_delay_ids.append(np.delete(term_delay_ids, var_id)) grad_coef_ids.append(coef_id) grad_feat_ids.append(np.delete(term_feat_ids, var_id)) - grad_delay_ids.append(np.delete(term_delay_ids, var_id)) return ( - np.array(grad_yyd_ids, dtype=np.int32), + np.array(grad_yyd_ids, dtype=np.int32, ndmin=2), + np.array(grad_delay_ids, dtype=np.int32, ndmin=2), np.array(grad_coef_ids, dtype=np.int32), - np.array(grad_feat_ids, dtype=np.int32), - np.array(grad_delay_ids, dtype=np.int32), + np.array(grad_feat_ids, dtype=np.int32, ndmin=2), ) - @staticmethod - def _update_dydx( - X, - y_hat, - coef, - feat_ids, - delay_ids, - output_ids, - fit_intercept, - grad_yyd_ids, - grad_coef_ids, - grad_feat_ids, - grad_delay_ids, - ): - """ - Computation of the Jacobian matrix dydx. - - Returns - ------- - dydx : ndarray of shape (n_samples, n_outputs, n_x) - Jacobian matrix of the outputs with respect to coefficients and intercepts. - """ - n_samples, n_y = y_hat.shape - max_delay = np.max(delay_ids) - n_coefs = feat_ids.shape[0] - if fit_intercept: - n_x = n_coefs + n_y # Total number of coefficients and intercepts - y_ids = np.r_[output_ids, np.arange(n_y)] - else: - n_x = n_coefs - y_ids = output_ids - - x_ids = np.arange(n_x) - - dydx = np.zeros((n_samples, n_y, n_x), dtype=float) - at_init = True - init_k = 0 - for k in range(n_samples): - if not (np.all(np.isfinite(X[k])) and np.all(np.isfinite(y_hat[k]))): - at_init = True - init_k = k + 1 - continue - if k - init_k == max_delay: - at_init = False - - if at_init: - continue - # Compute terms for time step k - terms = np.ones(n_x, dtype=float) - _update_terms( - X, - y_hat, - terms, - feat_ids, - delay_ids, - k, - ) - - # Update constant terms of Jacobian - dydx[k, y_ids, x_ids] = terms - - # Update dynamic terms of Jacobian - if max_delay > 0 and grad_yyd_ids.size > 0: - cfd = np.zeros((n_y, n_y, max_delay), dtype=float) - _update_cfd( - X, - y_hat, - cfd, - coef, - grad_yyd_ids, - grad_coef_ids, - grad_feat_ids, - grad_delay_ids, - k, - ) - for d in range(max_delay): - dydx[k] += cfd[:, :, d] @ dydx[k - d - 1] - - # Handle divergence - if np.any(dydx[k] > 1e20): - dydx[k:] = 1e20 - break - - return dydx - @staticmethod def _loss( coef_intercept, - expression, X, y, feat_ids, @@ -557,16 +423,16 @@ def _loss( *args, ): # Sum of squared errors - n_outputs = y.shape[1] + n_samples, n_y = y.shape if fit_intercept: - coef = coef_intercept[:-n_outputs] - intercept = coef_intercept[-n_outputs:] + coef = coef_intercept[:-n_y] + intercept = coef_intercept[-n_y:] else: coef = coef_intercept - intercept = np.zeros(n_outputs, dtype=float) + intercept = np.zeros(n_y, dtype=float) - y_hat = NARX._predict( - expression, + y_hat = np.zeros((n_samples, n_y), dtype=float) + _predict( X, y, coef, @@ -574,6 +440,7 @@ def _loss( feat_ids, delay_ids, output_ids, + y_hat, ) y_masked, y_hat_masked, sample_weight_sqrt_masked = mask_missing_values( @@ -585,7 +452,6 @@ def _loss( @staticmethod def _grad( coef_intercept, - expression, X, y, feat_ids, @@ -594,12 +460,12 @@ def _grad( fit_intercept, sample_weight_sqrt, grad_yyd_ids, + grad_delay_ids, grad_coef_ids, grad_feat_ids, - grad_delay_ids, ): # Sum of squared errors - n_outputs = y.shape[1] + n_samples, n_outputs = y.shape if fit_intercept: coef = coef_intercept[:-n_outputs] intercept = coef_intercept[-n_outputs:] @@ -607,8 +473,8 @@ def _grad( coef = coef_intercept intercept = np.zeros(n_outputs, dtype=float) - y_hat = NARX._predict( - expression, + y_hat = np.zeros((n_samples, n_outputs), dtype=float) + _predict( X, y, coef, @@ -616,19 +482,35 @@ def _grad( feat_ids, delay_ids, output_ids, + y_hat, ) - dydx = NARX._update_dydx( + + max_delay = np.max(delay_ids) + n_coefs = feat_ids.shape[0] + if fit_intercept: + n_x = n_coefs + n_outputs # Total number of coefficients and intercepts + y_ids = np.r_[output_ids, np.arange(n_outputs, dtype=np.int32)] + else: + n_x = n_coefs + y_ids = output_ids + dydx = np.zeros((n_samples, n_outputs, n_x), dtype=float) + dcf = np.zeros((max_delay, n_outputs, n_outputs), dtype=float) + n_threads = _openmp_effective_n_threads() + + _update_dydx( X, y_hat, coef, feat_ids, delay_ids, - output_ids, - fit_intercept, + y_ids, grad_yyd_ids, + grad_delay_ids, grad_coef_ids, grad_feat_ids, - grad_delay_ids, + dydx, + dcf, + n_threads, ) mask_valid = mask_missing_values(y, y_hat, sample_weight_sqrt, return_mask=True) @@ -702,8 +584,8 @@ def predict(self, X, y_init=None): f"`y_init` should have {self.n_outputs_} outputs " f"but got {y_init.shape[1]}." ) - y_hat = NARX._predict( - _predict_step, + y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=float) + _predict( X, y_init, self.coef_, @@ -711,6 +593,7 @@ def predict(self, X, y_init=None): self.feat_ids_, self.delay_ids_, self.output_ids_, + y_hat, ) if self.n_outputs_ == 1: y_hat = y_hat.flatten() diff --git a/fastcan/narx/_narx_fast.pyx b/fastcan/narx/_narx_fast.pyx index 5543f0b..0555ff3 100644 --- a/fastcan/narx/_narx_fast.pyx +++ b/fastcan/narx/_narx_fast.pyx @@ -4,24 +4,30 @@ Fast computation of prediction and gradient for narx. # Authors: The fastcan developers # SPDX-License-Identifier: MIT -from cython cimport floating, final +from libc.stdlib cimport malloc, free +from libc.string cimport memset +from cython cimport final +import numpy as np +from sklearn.utils._cython_blas cimport RowMajor, NoTrans +from sklearn.utils._cython_blas cimport _gemm @final -cpdef void _update_terms( - const floating[:, ::1] X, # IN - const floating[:, ::1] y_hat, # IN - floating[::1] terms, # OUT +cdef inline void _update_terms( + const double[:, ::1] X, # IN + const double[:, ::1] y_hat, # IN + double* terms, # OUT const int[:, ::1] feat_ids, # IN const int[:, ::1] delay_ids, # IN const int k, # IN + const int num_threads, # IN ) noexcept nogil: """ Evaluate all terms for the given features and delays at timestep k. """ cdef: - int i - int n_coefs = feat_ids.shape[0] + Py_ssize_t i + Py_ssize_t n_coefs = feat_ids.shape[0] for i in range(n_coefs): terms[i] = _evaluate_term( @@ -30,11 +36,11 @@ cpdef void _update_terms( @final -cpdef void _predict_step( - const floating[:, ::1] X, # IN - const floating[:, ::1] y_hat, # IN - floating[::1] y_pred, # OUT - const floating[::1] coef, # IN +cdef inline void _predict_step( + const double[:, ::1] X, # IN + const double[:, ::1] y_hat, # IN + double[::1] y_pred, # OUT + const double[::1] coef, # IN const int[:, ::1] feat_ids, # IN const int[:, ::1] delay_ids, # IN const int[::1] output_ids, # IN @@ -44,11 +50,11 @@ cpdef void _predict_step( Evaluate the expression for all outputs at timestep k. """ cdef: - int n_terms = feat_ids.shape[0] - int i, output_i + Py_ssize_t n_coefs = feat_ids.shape[0] + Py_ssize_t i, output_i # Add all terms - for i in range(n_terms): + for i in range(n_coefs): output_i = output_ids[i] y_pred[output_i] += coef[i] * _evaluate_term( X, y_hat, feat_ids[i], delay_ids[i], k @@ -56,23 +62,23 @@ cpdef void _predict_step( @final -cdef floating _evaluate_term( - const floating[:, ::1] X, # IN - const floating[:, ::1] y_hat, # IN - const int[::1] feat_ids, # IN - const int[::1] delay_ids, # IN +cdef inline double _evaluate_term( + const double[:, ::1] X, # IN + const double[:, ::1] y_hat, # IN + const int[::1] feat_ids, # IN + const int[::1] delay_ids, # IN const int k, # IN ) noexcept nogil: """ Evaluate a term based on feature and delay IDs. """ cdef: - int n_feats = X.shape[1] - int n_vars = feat_ids.shape[0] - floating term = 1.0 - int i, feat_id + Py_ssize_t n_feats = X.shape[1] + Py_ssize_t n_coefs = feat_ids.shape[0] + double term = 1.0 + Py_ssize_t i, feat_id - for i in range(n_vars): + for i in range(n_coefs): feat_id = feat_ids[i] if feat_id != -1: if feat_id < n_feats: @@ -84,30 +90,193 @@ cdef floating _evaluate_term( @final -cpdef void _update_cfd( - const floating[:, ::1] X, # IN - const floating[:, ::1] y_hat, # IN - floating[:, :, ::1] cfd, # OUT - const floating[::1] coef, # IN +cdef inline void _update_dcf( + const double[:, ::1] X, # IN + const double[:, ::1] y_hat, # IN + double[:, :, ::1] dcf, # OUT + const double[::1] coef, # IN const int[:, ::1] grad_yyd_ids, # IN + const int[:, ::1] grad_delay_ids, # IN const int[::1] grad_coef_ids, # IN const int[:, ::1] grad_feat_ids, # IN - const int[:, ::1] grad_delay_ids, # IN const int k, # IN ) noexcept nogil: """ - Updates CFD matrix based on the current state. + Updates DCF matrix based on the current state. """ cdef: - int n_grad_terms = grad_yyd_ids.shape[0] - int i, row_y_id, col_y_id, delay_id_1 + Py_ssize_t n_grad_terms = grad_yyd_ids.shape[0] + Py_ssize_t i, row_y_id, col_y_id, delay_id_1 + + memset(&dcf[0, 0, 0], 0, dcf.shape[0] * dcf.shape[1] * dcf.shape[2] * sizeof(double)) for i in range(n_grad_terms): row_y_id = grad_yyd_ids[i, 0] col_y_id = grad_yyd_ids[i, 1] delay_id_1 = grad_yyd_ids[i, 2] - cfd[row_y_id, col_y_id, delay_id_1] += coef[grad_coef_ids[i]] * \ + dcf[delay_id_1, row_y_id, col_y_id] += coef[grad_coef_ids[i]] * \ _evaluate_term( X, y_hat, grad_feat_ids[i], grad_delay_ids[i], k ) + + +@final +cpdef void _predict( + const double[:, ::1] X, # IN + const double[:, ::1] y_ref, # IN + const double[::1] coef, # IN + const double[::1] intercept, # IN + const int[:, ::1] feat_ids, # IN + const int[:, ::1] delay_ids, # IN + const int[::1] output_ids, # IN + double[:, ::1] y_hat, # OUT +) noexcept nogil: + """ + Vectorized (Cython) variant of Python NARX._predict. + Returns y_hat array (n_samples, n_outputs). + """ + cdef: + Py_ssize_t max_delay + with gil: + max_delay = np.max(delay_ids) + cdef: + Py_ssize_t n_samples = X.shape[0] + Py_ssize_t n_ref = y_ref.shape[0] + Py_ssize_t k + Py_ssize_t init_k = 0 + bint at_init = True + + for k in range(n_samples): + with gil: + if not np.all(np.isfinite(X[k])): + at_init = True + init_k = k + 1 + y_hat[k] = np.nan + continue + if k - init_k == max_delay: + at_init = False + + if at_init: + y_hat[k] = y_ref[k % n_ref] + with gil: + if not np.all(np.isfinite(y_hat[k])): + at_init = True + init_k = k + 1 + else: + y_hat[k] = intercept + _predict_step( + X, + y_hat, + y_hat[k], + coef, + feat_ids, + delay_ids, + output_ids, + k, + ) + # Handle divergence + with gil: + if np.max(np.abs(y_hat[k])) > 1e20: + break + + +@final +cpdef void _update_dydx( + const double[:, ::1] X, + const double[:, ::1] y_hat, + const double[::1] coef, + const int[:, ::1] feat_ids, + const int[:, ::1] delay_ids, + const int[::1] y_ids, + const int[:, ::1] grad_yyd_ids, + const int[:, ::1] grad_delay_ids, + const int[::1] grad_coef_ids, + const int[:, ::1] grad_feat_ids, + double[:, :, ::1] dydx, # OUT + double[:, :, ::1] dcf, # OUT + const int num_threads, # IN +) noexcept nogil: + """ + Computation of the Jacobian matrix dydx. + + Returns + ------- + dydx : ndarray of shape (n_samples, n_outputs, n_x) + Jacobian matrix of the outputs with respect to coefficients and intercepts. + """ + cdef Py_ssize_t max_delay + cdef bint not_empty + with gil: + max_delay = np.max(delay_ids) + not_empty = max_delay > 0 and grad_yyd_ids.size > 0 + cdef: + Py_ssize_t n_samples = y_hat.shape[0] + Py_ssize_t n_coefs = feat_ids.shape[0] + Py_ssize_t k, i, d + Py_ssize_t M = dcf.shape[1] # n_outputs + Py_ssize_t N = dydx.shape[2] # n_x + Py_ssize_t init_k = 0 + bint at_init = True + bint is_finite + double* terms_intercepts = malloc(sizeof(double) * N) + + # Set intercepts + for i in range(n_coefs, N): + terms_intercepts[i] = 1.0 + + for k in range(n_samples): + with gil: + is_finite = np.all(np.isfinite(X[k])) and np.all(np.isfinite(y_hat[k])) + if not is_finite: + at_init = True + init_k = k + 1 + continue + if k - init_k == max_delay: + at_init = False + + if at_init: + continue + # Compute terms for time step k (no effect on intercepts) + _update_terms( + X, + y_hat, + terms_intercepts, + feat_ids, + delay_ids, + k, + num_threads, + ) + + # Update constant terms of Jacobian + for i in range(N): + dydx[k, y_ids[i], i] = terms_intercepts[i] + + # Update dynamic terms of Jacobian + if not_empty: + _update_dcf( + X, + y_hat, + dcf, + coef, + grad_yyd_ids, + grad_delay_ids, + grad_coef_ids, + grad_feat_ids, + k, + ) + for d in range(max_delay): + # dydx[k] += dcf[d] @ dydx[k-d-1] + # dcf[d] (M,M), dydx[k] (M,N) + _gemm( + RowMajor, NoTrans, NoTrans, + M, N, M, + 1.0, &dcf[d, 0, 0], M, &dydx[k-d-1, 0, 0], N, + 1.0, &dydx[k, 0, 0], N, + ) + + # Handle divergence + with gil: + if np.max(np.abs(dydx[k])) > 1e20: + break + free(terms_intercepts) diff --git a/fastcan/narx/tests/test_narx.py b/fastcan/narx/tests/test_narx.py index 30be481..a54cea8 100644 --- a/fastcan/narx/tests/test_narx.py +++ b/fastcan/narx/tests/test_narx.py @@ -453,7 +453,8 @@ def test_divergence(): narx = make_narx(X, y, 3, 3, 2) narx.fit(X, y, coef_init=[-10, 0, 0, 0]) y_hat = narx.predict(X, y) - assert np.all(y_hat <= 1e20) + div_idx = np.where(np.abs(y_hat) > 1e20)[0][0] + 1 + assert np.all(y_hat[div_idx:] == 0) def test_tp2fd(): diff --git a/fastcan/narx/tests/test_narx_jac.py b/fastcan/narx/tests/test_narx_jac.py index 91c81d3..87bc873 100644 --- a/fastcan/narx/tests/test_narx_jac.py +++ b/fastcan/narx/tests/test_narx_jac.py @@ -6,7 +6,7 @@ from sklearn.metrics import r2_score from fastcan.narx import NARX, make_narx -from fastcan.narx._narx_fast import _predict_step # type: ignore[attr-defined] +from fastcan.narx._narx_fast import _predict # type: ignore[attr-defined] def test_simple(): @@ -26,8 +26,8 @@ def test_simple(): intercept = np.array([1], dtype=float) sample_weight = np.array([1, 1, 1], dtype=float).reshape(-1, 1) - y_hat = NARX._predict( - _predict_step, + y_hat = np.zeros_like(y, dtype=float) + _predict( X=X, y_ref=y, coef=coef, @@ -35,6 +35,7 @@ def test_simple(): feat_ids=feat_ids, delay_ids=delay_ids, output_ids=output_ids, + y_hat=y_hat, ) assert_array_equal(y_hat, y) @@ -42,8 +43,8 @@ def test_simple(): delta_w = 0.00001 coef_1 = np.array([0.4 + delta_w, 1]) - y_hat_1 = NARX._predict( - _predict_step, + y_hat_1 = np.zeros_like(y, dtype=float) + _predict( X=X, y_ref=y, coef=coef_1, @@ -51,6 +52,7 @@ def test_simple(): feat_ids=feat_ids, delay_ids=delay_ids, output_ids=output_ids, + y_hat=y_hat_1, ) grad_truth = np.array( @@ -65,12 +67,11 @@ def test_simple(): ] ) - grad_yyd_ids, grad_coef_ids, grad_feat_ids, grad_delay_ids = NARX._get_cfd_ids( + grad_yyd_ids, grad_delay_ids, grad_coef_ids, grad_feat_ids = NARX._get_dcf_ids( feat_ids, delay_ids, output_ids, 1 ) grad = NARX._grad( np.r_[coef_1, intercept], - _predict_step, X, y, feat_ids, @@ -79,15 +80,14 @@ def test_simple(): True, np.sqrt(sample_weight), grad_yyd_ids, + grad_delay_ids, grad_coef_ids, grad_feat_ids, - grad_delay_ids, ) assert_almost_equal(grad.sum(axis=0), grad_truth, decimal=4) grad_0 = NARX._grad( np.r_[coef_1, 0], - _predict_step, X, y, feat_ids, @@ -96,13 +96,12 @@ def test_simple(): True, np.sqrt(sample_weight), grad_yyd_ids, + grad_delay_ids, grad_coef_ids, grad_feat_ids, - grad_delay_ids, ) grad = NARX._grad( coef_1, - _predict_step, X, y, feat_ids, @@ -111,9 +110,9 @@ def test_simple(): False, np.sqrt(sample_weight), grad_yyd_ids, + grad_delay_ids, grad_coef_ids, grad_feat_ids, - grad_delay_ids, ) assert_almost_equal(grad.sum(axis=0), grad_0.sum(axis=0)[:-1]) @@ -198,12 +197,11 @@ def test_complex(): intercept = np.array([1, 0.5]) # NARX Jacobian - grad_yyd_ids, grad_coef_ids, grad_feat_ids, grad_delay_ids = NARX._get_cfd_ids( + grad_yyd_ids, grad_delay_ids, grad_coef_ids, grad_feat_ids = NARX._get_dcf_ids( feat_ids, delay_ids, output_ids, X.shape[1] ) grad = NARX._grad( np.r_[coef, intercept], - _predict_step, X, y, feat_ids, @@ -212,14 +210,14 @@ def test_complex(): True, np.sqrt(np.ones((y.shape[0], 1))), grad_yyd_ids, + grad_delay_ids, grad_coef_ids, grad_feat_ids, - grad_delay_ids, ) # Numerical gradient - y_hat_0 = NARX._predict( - _predict_step, + y_hat_0 = np.zeros_like(y, dtype=float) + _predict( X=X, y_ref=y, coef=coef, @@ -227,6 +225,7 @@ def test_complex(): feat_ids=feat_ids, delay_ids=delay_ids, output_ids=output_ids, + y_hat=y_hat_0, ) e_0 = y_hat_0 - y @@ -241,8 +240,8 @@ def test_complex(): intercept_1 = np.copy(intercept) intercept_1[i - len(coef)] += delta_w - y_hat_1 = NARX._predict( - _predict_step, + y_hat_1 = np.zeros_like(y, dtype=float) + _predict( X=X, y_ref=y, coef=coef_1, @@ -250,6 +249,7 @@ def test_complex(): feat_ids=feat_ids, delay_ids=delay_ids, output_ids=output_ids, + y_hat=y_hat_1, ) e_1 = y_hat_1 - y @@ -259,7 +259,6 @@ def test_complex(): grad = NARX._grad( coef, - _predict_step, X, y, feat_ids, @@ -268,19 +267,20 @@ def test_complex(): False, np.sqrt(np.ones((y.shape[0], 1))), grad_yyd_ids, + grad_delay_ids, grad_coef_ids, grad_feat_ids, - grad_delay_ids, ) - y_hat_0 = NARX._predict( - _predict_step, + y_hat_0 = np.zeros_like(y, dtype=float) + _predict( X=X, y_ref=y, coef=coef, - intercept=[0, 0], + intercept=np.array([0, 0], dtype=float), feat_ids=feat_ids, delay_ids=delay_ids, output_ids=output_ids, + y_hat=y_hat_0, ) e_0 = y_hat_0 - y @@ -288,15 +288,16 @@ def test_complex(): coef_1 = np.copy(coef) coef_1[i] += delta_w - y_hat_1 = NARX._predict( - _predict_step, + y_hat_1 = np.zeros_like(y, dtype=float) + _predict( X=X, y_ref=y, coef=coef_1, - intercept=[0, 0], + intercept=np.array([0, 0], dtype=float), feat_ids=feat_ids, delay_ids=delay_ids, output_ids=output_ids, + y_hat=y_hat_1, ) e_1 = y_hat_1 - y diff --git a/pixi.lock b/pixi.lock index 1a4a263..5ae2194 100644 --- a/pixi.lock +++ b/pixi.lock @@ -6125,7 +6125,7 @@ packages: - pypi: ./ name: fastcan version: 0.4.0 - sha256: 335c7da2d4dbd3de173152f1fbabd574555bd732fce56793455418b723b01054 + sha256: 2a742d953b335411ce634a3753dbbbb11d5db7c54fee7d51fec056f5b77c9bf1 requires_dist: - scikit-learn>=1.7.0,!=1.7.1 - furo ; extra == 'docs' diff --git a/pyproject.toml b/pyproject.toml index a68e298..b386d6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,6 +116,7 @@ time-h = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import Fast time-eta = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import FastCan; X = np.random.rand(3000, 100); y = np.random.rand(3000, 20)' 's = FastCan(100, eta=True, verbose=0).fit(X, y)'" profile-minibatch = { cmd = '''python -c "import cProfile; import numpy as np; from fastcan import minibatch; X = np.random.rand(100, 3000); y = np.random.rand(100, 20); cProfile.run('minibatch(X, y, 1000, 10, verbose=0)', sort='$SORT')"''', env = { SORT = "cumtime" } } time-narx = '''python -m timeit -n 1 -s "import numpy as np; from fastcan.narx import make_narx; rng = np.random.default_rng(5); X = rng.random((1000, 10)); y = rng.random((1000, 2)); m = make_narx(X, y, 10, max_delay=2, poly_degree=2, verbose=0)" "m.fit(X, y, coef_init='one_step_ahead', verbose=1)"''' +profile-narx = { cmd = '''python -c "import cProfile; import numpy as np; from fastcan.narx import make_narx; X = np.random.rand(3000, 3); y = np.random.rand(3000, 3); m = make_narx(X, y, 10, max_delay=10, poly_degree=2, verbose=0); cProfile.run('m.fit(X, y, coef_init=[0]*33)', sort='$SORT')"''', env = { SORT = "tottime" } } [tool.pixi.feature.asv.tasks] asv-build = { cmd = "python -m asv machine --machine $MACHINE --yes && python -m asv run --show-stderr -v --machine $MACHINE $EXTRA_ARGS", cwd = "asv_benchmarks", env = { MACHINE = "MacOS-M1", EXTRA_ARGS = "" } }