scikit-learn-contrib · MatthewSZhang · Nov 18, 2025 · Nov 18, 2025
diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml
@@ -9,7 +9,7 @@ jobs:
     steps:
       - uses: actions/checkout@v5
       - name: Build WASM wheel
-        uses: pypa/cibuildwheel@v3.2.1
+        uses: pypa/cibuildwheel@v3.3.0
         env:
           CIBW_PLATFORM: pyodide
       - name: Upload package

diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml
@@ -33,7 +33,7 @@ jobs:
     steps:
       - uses: actions/checkout@v5
       - name: Build wheels
-        uses: pypa/cibuildwheel@v3.2.1
+        uses: pypa/cibuildwheel@v3.3.0
         env:
           CIBW_SKIP: "*_i686 *_ppc64le *_s390x *_universal2 *-musllinux_* cp314t*"
           CIBW_PROJECT_REQUIRES_PYTHON: ">=3.9"

diff --git a/doc/narx.rst b/doc/narx.rst
@@ -46,20 +46,31 @@ The discontinuity can be caused by
    lasts for one hour at 1 Hz, and another measurement is carried out at Tuesday and lasts for two hours at 1 Hz
 #. Missing values
 
-No matter how the discontinuity is caused, :class:`NARX` treats the discontinuous time series as multiple pieces of continuous time series in the same way.
+As a result, there are two ways to inform :class:`NARX` the training data from different measurement sessions.
+From version 0.5, the difference measurement sessions can be set by `session_sizes` in :meth:`NARX.fit` and :func:`make_narx`.
+Alternatively, different measurement sessions can also be notified by inserting `np.nan` to break the data.
+The following examples show how to notify :class:`NARX` the training data from different measurement sessions.
 
-.. note::
-    It is easy to notify :class:`NARX` that the time series data are from different measurement sessions by inserting np.nan to break the data. For example,
+.. code-block:: python
 
     >>> import numpy as np
     >>> x0 = np.zeros((3, 2)) # First measurement session
     >>> x1 = np.ones((5, 2)) # Second measurement session
     >>> max_delay = 2 # Assume the maximum delay for NARX model is 2
     >>> # Insert (at least max_delay number of) np.nan to break the two measurement sessions
     >>> u = np.r_[x0, [[np.nan, np.nan]]*max_delay, x1]
+    >>> # Or use session_sizes to break the two measurement sessions
+    >>> session_sizes = [3, 5]
 
-    It is important to break the different measurement sessions by np.nan, because otherwise,
-    the model will assume the time interval between the two measurement sessions is the same as the time interval within a session.
+.. note::
+    It is important to separate different measurement sessions using either method.
+    Otherwise, the model will incorrectly assume that the time interval between two measurement sessions
+    is the same as the sampling interval within a session.
+
+    Actually, the two methods will result in the exact same models when `X` is not None or multiple-step-ahead prediction is not used for training.
+    If `X` is None, i.e. Auto-Regression models, and multiple-step-ahead prediction is used for training, only `session_sizes` can
+    separate the two measurement sessions.
+    Because when using multiple-step-ahead prediction, the optimiser relies on the missing values in inputs `X` rather than output `y`, while Auto-Regression models do not have input `X`.
 
 
 One-step-ahead and multiple-step-ahead prediction

diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst
@@ -17,6 +17,8 @@ features, which maximize the sum of the squared canonical correlation (SSC) with
 the principal components (PCs) acquired from PCA of the feature matrix :math:`X` [1]_.
 See the example below.
 
+.. code-block:: python
+
     >>> from sklearn.decomposition import PCA
     >>> from sklearn import datasets
     >>> from fastcan import FastCan

diff --git a/fastcan/narx/_base.py b/fastcan/narx/_base.py
@@ -162,11 +162,14 @@ def __init__(
         {
             "X": [None, "array-like"],
             "coef_init": [None, StrOptions({"one_step_ahead"}), "array-like"],
-            "sample_weight": ["array-like", None],
+            "sample_weight": [None, "array-like"],
+            "session_sizes": [None, "array-like"],
         },
         prefer_skip_nested_validation=True,
     )
-    def fit(self, X, y, sample_weight=None, coef_init=None, **params):
+    def fit(
+        self, X, y, sample_weight=None, coef_init=None, session_sizes=None, **params
+    ):
         """
         Fit narx model.
 
@@ -194,6 +197,13 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
                 When coef_init is `one_step_ahead`, the model will be trained as a
                 Multi-Step-Ahead NARX, rather than a One-Step-Ahead NARX.
 
+        session_sizes : array-like of shape (n_sessions,), default=None
+            The sizes of measurement sessions for time-series.
+            The sum of session_sizes should be equal to n_samples.
+            If None, the whole data is treated as one session.
+
+            .. versionadded:: 0.5
+
         **params : dict
             Keyword arguments passed to
             `scipy.optimize.least_squares`.
@@ -220,9 +230,6 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
             X = np.empty((len(y), 0), dtype=float, order="C")  # Create 0 feature input
         else:
             check_consistent_length(X, y)
-        sample_weight = _check_sample_weight(
-            sample_weight, X, dtype=X.dtype, ensure_non_negative=True
-        )
         # store the number of dimension of the target to predict an array of
         # similar shape at predict
         self._y_ndim = y.ndim
@@ -231,6 +238,12 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
         self.n_outputs_ = y.shape[1]
         n_samples, n_features = X.shape
 
+        sample_weight = _check_sample_weight(
+            sample_weight, X, dtype=X.dtype, ensure_non_negative=True
+        )
+
+        session_sizes_cumsum = _validate_session_sizes(session_sizes, n_samples)
+
         if self.feat_ids is None:
             if n_features == 0:
                 feat_ids_ = make_poly_ids(self.n_outputs_, 1) - 1
@@ -295,8 +308,13 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
             time_shift_ids, poly_ids = fd2tp(self.feat_ids_, self.delay_ids_)
             xy_hstack = np.c_[X, y]
             osa_narx = LinearRegression(fit_intercept=self.fit_intercept)
-            time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids)
-            poly_terms = make_poly_features(time_shift_vars, poly_ids)
+            poly_terms = _prepare_poly_terms(
+                xy_hstack,
+                time_shift_ids,
+                poly_ids,
+                session_sizes_cumsum,
+                self.max_delay_,
+            )
             # Remove missing values
             poly_terms_masked, y_masked, sample_weight_masked = mask_missing_values(
                 poly_terms, y, sample_weight
@@ -359,6 +377,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
                 self.output_ids_,
                 self.fit_intercept,
                 sample_weight_sqrt,
+                session_sizes_cumsum,
                 grad_yyd_ids,
                 grad_delay_ids,
                 grad_coef_ids,
@@ -422,6 +441,7 @@ def _loss(
         output_ids,
         fit_intercept,
         sample_weight_sqrt,
+        session_sizes_cumsum,
         *args,
     ):
         # Sum of squared errors
@@ -442,6 +462,7 @@ def _loss(
             feat_ids,
             delay_ids,
             output_ids,
+            session_sizes_cumsum,
             y_hat,
         )
 
@@ -461,6 +482,7 @@ def _grad(
         output_ids,
         fit_intercept,
         sample_weight_sqrt,
+        session_sizes_cumsum,
         grad_yyd_ids,
         grad_delay_ids,
         grad_coef_ids,
@@ -484,6 +506,7 @@ def _grad(
             feat_ids,
             delay_ids,
             output_ids,
+            session_sizes_cumsum,
             y_hat,
         )
 
@@ -510,6 +533,7 @@ def _grad(
             grad_delay_ids,
             grad_coef_ids,
             grad_feat_ids,
+            session_sizes_cumsum,
             dydx,
             dcf,
         )
@@ -587,7 +611,9 @@ def predict(self, X, y_init=None):
                     f"`y_init` should have {self.n_outputs_} outputs "
                     f"but got {y_init.shape[1]}."
                 )
-        y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=float)
+        n_samples = X.shape[0]
+        y_hat = np.zeros((n_samples, self.n_outputs_), dtype=float)
+        session_sizes_cumsum = np.array([n_samples], dtype=np.int32)
         _predict(
             X,
             y_init,
@@ -596,6 +622,7 @@ def predict(self, X, y_init=None):
             self.feat_ids_,
             self.delay_ids_,
             self.output_ids_,
+            session_sizes_cumsum,
             y_hat,
         )
         if self._y_ndim == 1:
@@ -606,3 +633,34 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
         return tags
+
+
+def _validate_session_sizes(session_sizes, n_samples):
+    if session_sizes is None:
+        return np.array([n_samples], dtype=np.int32)
+    session_sizes = column_or_1d(
+        session_sizes,
+        dtype=np.int32,
+        warn=True,
+    )
+    if (session_sizes <= 0).any():
+        raise ValueError(
+            "All elements of session_sizes should be positive, "
+            f"but got {session_sizes}."
+        )
+    if session_sizes.sum() != n_samples:
+        raise ValueError(
+            "The sum of session_sizes should be equal to n_samples, "
+            f"but got {session_sizes.sum()} != {n_samples}."
+        )
+    return np.cumsum(session_sizes, dtype=np.int32)
+
+
+def _prepare_poly_terms(
+    xy_hstack, time_shift_ids, poly_ids, session_sizes_cumsum, max_delay
+):
+    time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids)
+    for start in session_sizes_cumsum[:-1]:
+        time_shift_vars[start : start + max_delay] = np.nan
+    poly_terms = make_poly_features(time_shift_vars, poly_ids)
+    return poly_terms
diff --git a/fastcan/narx/_narx_fast.pyx b/fastcan/narx/_narx_fast.pyx
@@ -122,14 +122,15 @@ cdef inline void _update_dcf(
 
 @final
 cpdef void _predict(
-    const double[:, ::1] X,             # IN
-    const double[:, ::1] y_ref,         # IN
-    const double[::1] coef,             # IN
-    const double[::1] intercept,        # IN
-    const int[:, ::1] feat_ids,         # IN
-    const int[:, ::1] delay_ids,        # IN
-    const int[::1] output_ids,          # IN
-    double[:, ::1] y_hat,               # OUT
+    const double[:, ::1] X,                 # IN
+    const double[:, ::1] y_ref,             # IN
+    const double[::1] coef,                 # IN
+    const double[::1] intercept,            # IN
+    const int[:, ::1] feat_ids,             # IN
+    const int[:, ::1] delay_ids,            # IN
+    const int[::1] output_ids,              # IN
+    const int[::1] session_sizes_cumsum,    # IN
+    double[:, ::1] y_hat,                   # OUT
 ) noexcept nogil:
     """
     Vectorized (Cython) variant of Python NARX._predict.
@@ -142,11 +143,15 @@ cpdef void _predict(
     cdef:
         Py_ssize_t n_samples = X.shape[0]
         Py_ssize_t n_ref = y_ref.shape[0]
-        Py_ssize_t k
+        Py_ssize_t k, s = 0
         Py_ssize_t init_k = 0
         bint at_init = True
 
     for k in range(n_samples):
+        if k == session_sizes_cumsum[s]:
+            s += 1
+            at_init = True
+            init_k = k
         with gil:
             if not np.all(np.isfinite(X[k])):
                 at_init = True
@@ -192,8 +197,9 @@ cpdef void _update_dydx(
     const int[:, ::1] grad_delay_ids,
     const int[::1] grad_coef_ids,
     const int[:, ::1] grad_feat_ids,
-    double[:, :, ::1] dydx,       # OUT
-    double[:, :, ::1] dcf,        # OUT
+    const int[::1] session_sizes_cumsum,
+    double[:, :, ::1] dydx,                 # OUT
+    double[:, :, ::1] dcf,                  # OUT
 ) noexcept nogil:
     """
     Computation of the Jacobian matrix dydx.
@@ -211,7 +217,7 @@ cpdef void _update_dydx(
     cdef:
         Py_ssize_t n_samples = y_hat.shape[0]
         Py_ssize_t n_coefs = feat_ids.shape[0]
-        Py_ssize_t k, i, d
+        Py_ssize_t k, i, d, s = 0
         Py_ssize_t M = dcf.shape[1]      # n_outputs
         Py_ssize_t N = dydx.shape[2]     # n_x
         Py_ssize_t init_k = 0
@@ -224,6 +230,11 @@ cpdef void _update_dydx(
         terms_intercepts[i] = 1.0
 
     for k in range(n_samples):
+        if k == session_sizes_cumsum[s]:
+            s += 1
+            at_init = True
+            init_k = k
+            continue
         with gil:
             is_finite = np.all(np.isfinite(X[k])) and np.all(np.isfinite(y_hat[k]))
         if not is_finite:

diff --git a/fastcan/narx/_utils.py b/fastcan/narx/_utils.py
@@ -18,11 +18,9 @@
 from .._fastcan import FastCan
 from .._refine import refine
 from ..utils import mask_missing_values
-from ._base import NARX
+from ._base import NARX, _prepare_poly_terms, _validate_session_sizes
 from ._feature import (
-    make_poly_features,
     make_poly_ids,
-    make_time_shift_features,
     make_time_shift_ids,
     tp2fd,
 )
@@ -136,6 +134,7 @@ def _get_term_str(term_feat_ids, term_delay_ids):
             Interval(Integral, 1, None, closed="left"),
         ],
         "fit_intercept": ["boolean"],
+        "session_sizes": [None, "array-like"],
         "max_candidates": [None, Interval(Integral, 1, None, closed="left")],
         "random_state": ["random_state"],
         "include_zero_delay": [None, "array-like"],
@@ -161,6 +160,7 @@ def make_narx(
     poly_degree=1,
     *,
     fit_intercept=True,
+    session_sizes=None,
     max_candidates=None,
     random_state=None,
     include_zero_delay=None,
@@ -194,6 +194,13 @@ def make_narx(
     fit_intercept : bool, default=True
         Whether to fit the intercept. If set to False, intercept will be zeros.
 
+    session_sizes : array-like of shape (n_sessions,), default=None
+        The sizes of measurement sessions for time-series.
+        The sum of session_sizes should be equal to n_samples.
+        If None, the whole data is treated as one session.
+
+        .. versionadded:: 0.5
+
     max_candidates : int, default=None
         Maximum number of candidate polynomial terms retained before selection.
         Randomly selected by reservoir sampling.
@@ -284,7 +291,7 @@ def make_narx(
         check_consistent_length(X, y)
     if y.ndim == 1:
         y = y.reshape(-1, 1)
-    n_outputs = y.shape[1]
+    n_samples, n_outputs = y.shape
     if isinstance(n_terms_to_select, Integral):
         n_terms_to_select = np.full(n_outputs, n_terms_to_select, dtype=int)
     else:
@@ -295,6 +302,7 @@ def make_narx(
                 f"the number of outputs, {n_outputs}, but got "
                 f"{len(n_terms_to_select)}."
             )
+    session_sizes_cumsum = _validate_session_sizes(session_sizes, n_samples)
 
     xy_hstack = np.c_[X, y]
     n_features = X.shape[1]
@@ -318,16 +326,20 @@ def make_narx(
         ),
         0,
     )
-    time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids_all)
-
     poly_ids_all = make_poly_ids(
         time_shift_ids_all.shape[0],
         poly_degree,
         max_poly=max_candidates,
         random_state=random_state,
     )
-    poly_terms = make_poly_features(time_shift_vars, poly_ids_all)
 
+    poly_terms = _prepare_poly_terms(
+        xy_hstack,
+        time_shift_ids_all,
+        poly_ids_all,
+        session_sizes_cumsum,
+        max_delay,
+    )
     # Remove missing values
     poly_terms_masked, y_masked = mask_missing_values(poly_terms, y)