diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml index d231b72..3cb6852 100644 --- a/.github/workflows/emscripten.yml +++ b/.github/workflows/emscripten.yml @@ -9,7 +9,7 @@ jobs: steps: - uses: actions/checkout@v5 - name: Build WASM wheel - uses: pypa/cibuildwheel@v3.2.1 + uses: pypa/cibuildwheel@v3.3.0 env: CIBW_PLATFORM: pyodide - name: Upload package diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 01ea75e..457d38d 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -33,7 +33,7 @@ jobs: steps: - uses: actions/checkout@v5 - name: Build wheels - uses: pypa/cibuildwheel@v3.2.1 + uses: pypa/cibuildwheel@v3.3.0 env: CIBW_SKIP: "*_i686 *_ppc64le *_s390x *_universal2 *-musllinux_* cp314t*" CIBW_PROJECT_REQUIRES_PYTHON: ">=3.9" diff --git a/doc/narx.rst b/doc/narx.rst index db4d33d..dc379d2 100644 --- a/doc/narx.rst +++ b/doc/narx.rst @@ -46,10 +46,12 @@ The discontinuity can be caused by lasts for one hour at 1 Hz, and another measurement is carried out at Tuesday and lasts for two hours at 1 Hz #. Missing values -No matter how the discontinuity is caused, :class:`NARX` treats the discontinuous time series as multiple pieces of continuous time series in the same way. +As a result, there are two ways to inform :class:`NARX` the training data from different measurement sessions. +From version 0.5, the difference measurement sessions can be set by `session_sizes` in :meth:`NARX.fit` and :func:`make_narx`. +Alternatively, different measurement sessions can also be notified by inserting `np.nan` to break the data. +The following examples show how to notify :class:`NARX` the training data from different measurement sessions. -.. note:: - It is easy to notify :class:`NARX` that the time series data are from different measurement sessions by inserting np.nan to break the data. For example, +.. code-block:: python >>> import numpy as np >>> x0 = np.zeros((3, 2)) # First measurement session @@ -57,9 +59,18 @@ No matter how the discontinuity is caused, :class:`NARX` treats the discontinuou >>> max_delay = 2 # Assume the maximum delay for NARX model is 2 >>> # Insert (at least max_delay number of) np.nan to break the two measurement sessions >>> u = np.r_[x0, [[np.nan, np.nan]]*max_delay, x1] + >>> # Or use session_sizes to break the two measurement sessions + >>> session_sizes = [3, 5] - It is important to break the different measurement sessions by np.nan, because otherwise, - the model will assume the time interval between the two measurement sessions is the same as the time interval within a session. +.. note:: + It is important to separate different measurement sessions using either method. + Otherwise, the model will incorrectly assume that the time interval between two measurement sessions + is the same as the sampling interval within a session. + + Actually, the two methods will result in the exact same models when `X` is not None or multiple-step-ahead prediction is not used for training. + If `X` is None, i.e. Auto-Regression models, and multiple-step-ahead prediction is used for training, only `session_sizes` can + separate the two measurement sessions. + Because when using multiple-step-ahead prediction, the optimiser relies on the missing values in inputs `X` rather than output `y`, while Auto-Regression models do not have input `X`. One-step-ahead and multiple-step-ahead prediction diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst index 68c9d73..1f64ed1 100644 --- a/doc/unsupervised.rst +++ b/doc/unsupervised.rst @@ -17,6 +17,8 @@ features, which maximize the sum of the squared canonical correlation (SSC) with the principal components (PCs) acquired from PCA of the feature matrix :math:`X` [1]_. See the example below. +.. code-block:: python + >>> from sklearn.decomposition import PCA >>> from sklearn import datasets >>> from fastcan import FastCan diff --git a/fastcan/narx/_base.py b/fastcan/narx/_base.py index f32abe1..71a7e14 100644 --- a/fastcan/narx/_base.py +++ b/fastcan/narx/_base.py @@ -162,11 +162,14 @@ def __init__( { "X": [None, "array-like"], "coef_init": [None, StrOptions({"one_step_ahead"}), "array-like"], - "sample_weight": ["array-like", None], + "sample_weight": [None, "array-like"], + "session_sizes": [None, "array-like"], }, prefer_skip_nested_validation=True, ) - def fit(self, X, y, sample_weight=None, coef_init=None, **params): + def fit( + self, X, y, sample_weight=None, coef_init=None, session_sizes=None, **params + ): """ Fit narx model. @@ -194,6 +197,13 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): When coef_init is `one_step_ahead`, the model will be trained as a Multi-Step-Ahead NARX, rather than a One-Step-Ahead NARX. + session_sizes : array-like of shape (n_sessions,), default=None + The sizes of measurement sessions for time-series. + The sum of session_sizes should be equal to n_samples. + If None, the whole data is treated as one session. + + .. versionadded:: 0.5 + **params : dict Keyword arguments passed to `scipy.optimize.least_squares`. @@ -220,9 +230,6 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): X = np.empty((len(y), 0), dtype=float, order="C") # Create 0 feature input else: check_consistent_length(X, y) - sample_weight = _check_sample_weight( - sample_weight, X, dtype=X.dtype, ensure_non_negative=True - ) # store the number of dimension of the target to predict an array of # similar shape at predict self._y_ndim = y.ndim @@ -231,6 +238,12 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): self.n_outputs_ = y.shape[1] n_samples, n_features = X.shape + sample_weight = _check_sample_weight( + sample_weight, X, dtype=X.dtype, ensure_non_negative=True + ) + + session_sizes_cumsum = _validate_session_sizes(session_sizes, n_samples) + if self.feat_ids is None: if n_features == 0: feat_ids_ = make_poly_ids(self.n_outputs_, 1) - 1 @@ -295,8 +308,13 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): time_shift_ids, poly_ids = fd2tp(self.feat_ids_, self.delay_ids_) xy_hstack = np.c_[X, y] osa_narx = LinearRegression(fit_intercept=self.fit_intercept) - time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids) - poly_terms = make_poly_features(time_shift_vars, poly_ids) + poly_terms = _prepare_poly_terms( + xy_hstack, + time_shift_ids, + poly_ids, + session_sizes_cumsum, + self.max_delay_, + ) # Remove missing values poly_terms_masked, y_masked, sample_weight_masked = mask_missing_values( poly_terms, y, sample_weight @@ -359,6 +377,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params): self.output_ids_, self.fit_intercept, sample_weight_sqrt, + session_sizes_cumsum, grad_yyd_ids, grad_delay_ids, grad_coef_ids, @@ -422,6 +441,7 @@ def _loss( output_ids, fit_intercept, sample_weight_sqrt, + session_sizes_cumsum, *args, ): # Sum of squared errors @@ -442,6 +462,7 @@ def _loss( feat_ids, delay_ids, output_ids, + session_sizes_cumsum, y_hat, ) @@ -461,6 +482,7 @@ def _grad( output_ids, fit_intercept, sample_weight_sqrt, + session_sizes_cumsum, grad_yyd_ids, grad_delay_ids, grad_coef_ids, @@ -484,6 +506,7 @@ def _grad( feat_ids, delay_ids, output_ids, + session_sizes_cumsum, y_hat, ) @@ -510,6 +533,7 @@ def _grad( grad_delay_ids, grad_coef_ids, grad_feat_ids, + session_sizes_cumsum, dydx, dcf, ) @@ -587,7 +611,9 @@ def predict(self, X, y_init=None): f"`y_init` should have {self.n_outputs_} outputs " f"but got {y_init.shape[1]}." ) - y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=float) + n_samples = X.shape[0] + y_hat = np.zeros((n_samples, self.n_outputs_), dtype=float) + session_sizes_cumsum = np.array([n_samples], dtype=np.int32) _predict( X, y_init, @@ -596,6 +622,7 @@ def predict(self, X, y_init=None): self.feat_ids_, self.delay_ids_, self.output_ids_, + session_sizes_cumsum, y_hat, ) if self._y_ndim == 1: @@ -606,3 +633,34 @@ def __sklearn_tags__(self): tags = super().__sklearn_tags__() tags.input_tags.allow_nan = True return tags + + +def _validate_session_sizes(session_sizes, n_samples): + if session_sizes is None: + return np.array([n_samples], dtype=np.int32) + session_sizes = column_or_1d( + session_sizes, + dtype=np.int32, + warn=True, + ) + if (session_sizes <= 0).any(): + raise ValueError( + "All elements of session_sizes should be positive, " + f"but got {session_sizes}." + ) + if session_sizes.sum() != n_samples: + raise ValueError( + "The sum of session_sizes should be equal to n_samples, " + f"but got {session_sizes.sum()} != {n_samples}." + ) + return np.cumsum(session_sizes, dtype=np.int32) + + +def _prepare_poly_terms( + xy_hstack, time_shift_ids, poly_ids, session_sizes_cumsum, max_delay +): + time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids) + for start in session_sizes_cumsum[:-1]: + time_shift_vars[start : start + max_delay] = np.nan + poly_terms = make_poly_features(time_shift_vars, poly_ids) + return poly_terms diff --git a/fastcan/narx/_narx_fast.pyx b/fastcan/narx/_narx_fast.pyx index a1e72e1..4b49187 100644 --- a/fastcan/narx/_narx_fast.pyx +++ b/fastcan/narx/_narx_fast.pyx @@ -122,14 +122,15 @@ cdef inline void _update_dcf( @final cpdef void _predict( - const double[:, ::1] X, # IN - const double[:, ::1] y_ref, # IN - const double[::1] coef, # IN - const double[::1] intercept, # IN - const int[:, ::1] feat_ids, # IN - const int[:, ::1] delay_ids, # IN - const int[::1] output_ids, # IN - double[:, ::1] y_hat, # OUT + const double[:, ::1] X, # IN + const double[:, ::1] y_ref, # IN + const double[::1] coef, # IN + const double[::1] intercept, # IN + const int[:, ::1] feat_ids, # IN + const int[:, ::1] delay_ids, # IN + const int[::1] output_ids, # IN + const int[::1] session_sizes_cumsum, # IN + double[:, ::1] y_hat, # OUT ) noexcept nogil: """ Vectorized (Cython) variant of Python NARX._predict. @@ -142,11 +143,15 @@ cpdef void _predict( cdef: Py_ssize_t n_samples = X.shape[0] Py_ssize_t n_ref = y_ref.shape[0] - Py_ssize_t k + Py_ssize_t k, s = 0 Py_ssize_t init_k = 0 bint at_init = True for k in range(n_samples): + if k == session_sizes_cumsum[s]: + s += 1 + at_init = True + init_k = k with gil: if not np.all(np.isfinite(X[k])): at_init = True @@ -192,8 +197,9 @@ cpdef void _update_dydx( const int[:, ::1] grad_delay_ids, const int[::1] grad_coef_ids, const int[:, ::1] grad_feat_ids, - double[:, :, ::1] dydx, # OUT - double[:, :, ::1] dcf, # OUT + const int[::1] session_sizes_cumsum, + double[:, :, ::1] dydx, # OUT + double[:, :, ::1] dcf, # OUT ) noexcept nogil: """ Computation of the Jacobian matrix dydx. @@ -211,7 +217,7 @@ cpdef void _update_dydx( cdef: Py_ssize_t n_samples = y_hat.shape[0] Py_ssize_t n_coefs = feat_ids.shape[0] - Py_ssize_t k, i, d + Py_ssize_t k, i, d, s = 0 Py_ssize_t M = dcf.shape[1] # n_outputs Py_ssize_t N = dydx.shape[2] # n_x Py_ssize_t init_k = 0 @@ -224,6 +230,11 @@ cpdef void _update_dydx( terms_intercepts[i] = 1.0 for k in range(n_samples): + if k == session_sizes_cumsum[s]: + s += 1 + at_init = True + init_k = k + continue with gil: is_finite = np.all(np.isfinite(X[k])) and np.all(np.isfinite(y_hat[k])) if not is_finite: diff --git a/fastcan/narx/_utils.py b/fastcan/narx/_utils.py index 96def3b..9f86ba9 100644 --- a/fastcan/narx/_utils.py +++ b/fastcan/narx/_utils.py @@ -18,11 +18,9 @@ from .._fastcan import FastCan from .._refine import refine from ..utils import mask_missing_values -from ._base import NARX +from ._base import NARX, _prepare_poly_terms, _validate_session_sizes from ._feature import ( - make_poly_features, make_poly_ids, - make_time_shift_features, make_time_shift_ids, tp2fd, ) @@ -136,6 +134,7 @@ def _get_term_str(term_feat_ids, term_delay_ids): Interval(Integral, 1, None, closed="left"), ], "fit_intercept": ["boolean"], + "session_sizes": [None, "array-like"], "max_candidates": [None, Interval(Integral, 1, None, closed="left")], "random_state": ["random_state"], "include_zero_delay": [None, "array-like"], @@ -161,6 +160,7 @@ def make_narx( poly_degree=1, *, fit_intercept=True, + session_sizes=None, max_candidates=None, random_state=None, include_zero_delay=None, @@ -194,6 +194,13 @@ def make_narx( fit_intercept : bool, default=True Whether to fit the intercept. If set to False, intercept will be zeros. + session_sizes : array-like of shape (n_sessions,), default=None + The sizes of measurement sessions for time-series. + The sum of session_sizes should be equal to n_samples. + If None, the whole data is treated as one session. + + .. versionadded:: 0.5 + max_candidates : int, default=None Maximum number of candidate polynomial terms retained before selection. Randomly selected by reservoir sampling. @@ -284,7 +291,7 @@ def make_narx( check_consistent_length(X, y) if y.ndim == 1: y = y.reshape(-1, 1) - n_outputs = y.shape[1] + n_samples, n_outputs = y.shape if isinstance(n_terms_to_select, Integral): n_terms_to_select = np.full(n_outputs, n_terms_to_select, dtype=int) else: @@ -295,6 +302,7 @@ def make_narx( f"the number of outputs, {n_outputs}, but got " f"{len(n_terms_to_select)}." ) + session_sizes_cumsum = _validate_session_sizes(session_sizes, n_samples) xy_hstack = np.c_[X, y] n_features = X.shape[1] @@ -318,16 +326,20 @@ def make_narx( ), 0, ) - time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids_all) - poly_ids_all = make_poly_ids( time_shift_ids_all.shape[0], poly_degree, max_poly=max_candidates, random_state=random_state, ) - poly_terms = make_poly_features(time_shift_vars, poly_ids_all) + poly_terms = _prepare_poly_terms( + xy_hstack, + time_shift_ids_all, + poly_ids_all, + session_sizes_cumsum, + max_delay, + ) # Remove missing values poly_terms_masked, y_masked = mask_missing_values(poly_terms, y) diff --git a/fastcan/narx/tests/test_narx.py b/fastcan/narx/tests/test_narx.py index ef3aa2a..5547c9e 100644 --- a/fastcan/narx/tests/test_narx.py +++ b/fastcan/narx/tests/test_narx.py @@ -727,3 +727,88 @@ def test_predict_ndim(): model.fit(X, y.reshape(-1, 1)) y_hat = model.predict(X, y_init=y[: model.max_delay_]) assert y_hat.ndim == 2 + + +def test_session_sizes(): + """Test that session sizes in make_narx and NARX.fit and error messages.""" + X = np.random.rand(100, 2) + y = np.random.rand(100, 1) + with pytest.raises(ValueError, match=r"The sum of session_sizes should be.*"): + model = make_narx( + X, + y, + n_terms_to_select=5, + max_delay=3, + poly_degree=2, + verbose=0, + session_sizes=[10, 20], + ) + with pytest.raises( + ValueError, match=r"All elements of session_sizes should be positive.*" + ): + model = make_narx( + X, + y, + n_terms_to_select=5, + max_delay=3, + poly_degree=2, + verbose=0, + ) + model.fit(X, y, session_sizes=[-10, 110]) + model = make_narx( + X, + y, + n_terms_to_select=5, + max_delay=3, + poly_degree=2, + verbose=0, + ) + session_sizes = [20, 30, 50] + session_sizes_cumsum = np.cumsum(session_sizes) + model.fit(X, y, session_sizes=session_sizes) + coef_session = model.coef_ + X_missing = X[: session_sizes_cumsum[0]] + y_missing = y[: session_sizes_cumsum[0]] + for i in range(len(session_sizes) - 1): + X_missing = np.r_[ + X_missing, + [[np.nan, np.nan]] * model.max_delay_, + X[session_sizes_cumsum[i] : session_sizes_cumsum[i + 1]], + ] + y_missing = np.r_[ + y_missing, + [[np.nan]] * model.max_delay_, + y[session_sizes_cumsum[i] : session_sizes_cumsum[i + 1]], + ] + coef_missing = model.fit(X_missing, y_missing).coef_ + # Exactly same for one-step-ahead fitting + assert np.sum(np.abs(coef_session - coef_missing)) == 0 + coef_session_msa = model.fit( + X, y, session_sizes=session_sizes, coef_init="one_step_ahead" + ).coef_ + coef_missing_msa = model.fit(X_missing, y_missing, coef_init="one_step_ahead").coef_ + # Exactly same for multi-step-ahead fitting + assert np.sum(np.abs(coef_session_msa - coef_missing_msa)) == 0 + + # Auto-regression with session sizes + arm = make_narx( + X=None, + y=y, + n_terms_to_select=5, + max_delay=3, + poly_degree=2, + verbose=0, + ) + arm.fit(X=None, y=y, session_sizes=session_sizes) + ar_coef_session = arm.coef_ + arm.fit(X=None, y=y_missing) + ar_coef_missing = arm.coef_ + # Exactly same for one-step-ahead fitting + assert np.sum(np.abs(ar_coef_session - ar_coef_missing)) == 0 + + arm.fit(X=None, y=y, session_sizes=session_sizes, coef_init="one_step_ahead") + ar_coef_session_msa = arm.coef_ + arm.fit(X=None, y=y_missing, coef_init="one_step_ahead") + ar_coef_missing_msa = arm.coef_ + # Not same for multi-step-ahead fitting + assert np.sum(np.abs(ar_coef_session_msa - ar_coef_missing_msa)) != 0 diff --git a/fastcan/narx/tests/test_narx_jac.py b/fastcan/narx/tests/test_narx_jac.py index 87bc873..2c2ca1e 100644 --- a/fastcan/narx/tests/test_narx_jac.py +++ b/fastcan/narx/tests/test_narx_jac.py @@ -36,6 +36,7 @@ def test_simple(): delay_ids=delay_ids, output_ids=output_ids, y_hat=y_hat, + session_sizes_cumsum=np.array([len(y)], dtype=np.int32), ) assert_array_equal(y_hat, y) @@ -53,6 +54,7 @@ def test_simple(): delay_ids=delay_ids, output_ids=output_ids, y_hat=y_hat_1, + session_sizes_cumsum=np.array([len(y)], dtype=np.int32), ) grad_truth = np.array( @@ -79,6 +81,7 @@ def test_simple(): output_ids, True, np.sqrt(sample_weight), + np.array([len(y)], dtype=np.int32), grad_yyd_ids, grad_delay_ids, grad_coef_ids, @@ -95,6 +98,7 @@ def test_simple(): output_ids, True, np.sqrt(sample_weight), + np.array([len(y)], dtype=np.int32), grad_yyd_ids, grad_delay_ids, grad_coef_ids, @@ -109,6 +113,7 @@ def test_simple(): output_ids, False, np.sqrt(sample_weight), + np.array([len(y)], dtype=np.int32), grad_yyd_ids, grad_delay_ids, grad_coef_ids, @@ -209,6 +214,7 @@ def test_complex(): output_ids, True, np.sqrt(np.ones((y.shape[0], 1))), + np.array([len(y)], dtype=np.int32), grad_yyd_ids, grad_delay_ids, grad_coef_ids, @@ -226,6 +232,7 @@ def test_complex(): delay_ids=delay_ids, output_ids=output_ids, y_hat=y_hat_0, + session_sizes_cumsum=np.array([len(y)], dtype=np.int32), ) e_0 = y_hat_0 - y @@ -250,6 +257,7 @@ def test_complex(): delay_ids=delay_ids, output_ids=output_ids, y_hat=y_hat_1, + session_sizes_cumsum=np.array([len(y)], dtype=np.int32), ) e_1 = y_hat_1 - y @@ -266,6 +274,7 @@ def test_complex(): output_ids, False, np.sqrt(np.ones((y.shape[0], 1))), + np.array([len(y)], dtype=np.int32), grad_yyd_ids, grad_delay_ids, grad_coef_ids, @@ -281,6 +290,7 @@ def test_complex(): delay_ids=delay_ids, output_ids=output_ids, y_hat=y_hat_0, + session_sizes_cumsum=np.array([len(y)], dtype=np.int32), ) e_0 = y_hat_0 - y @@ -298,6 +308,7 @@ def test_complex(): delay_ids=delay_ids, output_ids=output_ids, y_hat=y_hat_1, + session_sizes_cumsum=np.array([len(y)], dtype=np.int32), ) e_1 = y_hat_1 - y