scikit-learn-contrib · MatthewSZhang · May 30, 2025 · May 30, 2025 · May 30, 2025
diff --git a/doc/narx.rst b/doc/narx.rst
@@ -54,7 +54,8 @@ No matter how the discontinuity is caused, :class:`NARX` treats the discontinuou
     >>> import numpy as np
     >>> x0 = np.zeros((3, 2)) # First measurement session has 3 samples with 2 features
     >>> x1 = np.ones((5, 2)) # Second measurement session has 5 samples with 2 features
-    >>> u = np.r_[x0, [[np.nan, np.nan]], x1] # Insert np.nan to break the two measurement sessions
+    >>> max_delay = 2 # Assume the maximum delay for NARX model is 2
+    >>> u = np.r_[x0, [[np.nan, np.nan]]*max_delay, x1] # Insert (at least max_delay number of) np.nan to break the two measurement sessions
 
     It is important to break the different measurement sessions by np.nan, because otherwise,
     the model will assume the time interval between the two measurement sessions is the same as the time interval within a session.

diff --git a/examples/plot_narx.py b/examples/plot_narx.py
@@ -108,10 +108,14 @@
 # whose :math:`X` is the nonlinear terms and :math:`y` is the output signal.
 
 from fastcan import FastCan
+from fastcan.utils import mask_missing_values
+
+# Mask out missing values caused by time-shifting
+poly_terms_masked, y_masked = mask_missing_values(poly_terms, y)
 
 selector = FastCan(
     n_features_to_select=4,  # 4 terms should be selected
-).fit(poly_terms, y)
+).fit(poly_terms_masked, y_masked)
 
 support = selector.get_support()
 selected_poly_ids = poly_ids[support]

diff --git a/examples/plot_narx_msa.py b/examples/plot_narx_msa.py
@@ -83,8 +83,8 @@ def auto_duffing_equation(y, t):
 n_samples = 1000
 
 rng = np.random.default_rng(12345)
-e_train = rng.normal(0, 0.0002, n_samples)
-e_test = rng.normal(0, 0.0002, n_samples)
+e_train = rng.normal(0, 0.0004, n_samples)
+e_test = rng.normal(0, 0.0004, n_samples)
 t = np.linspace(0, dur, n_samples)
 
 sol = odeint(duffing_equation, [0.6, 0.8], t)
@@ -153,12 +153,13 @@ def plot_prediction(ax, t, y_true, y_pred, title):
 # The plot above shows that the NARX model cannot capture the dynamics at
 # the left equilibrium shown in the phase portraits. To improve the performance, let us
 # combine the training and test data for model training to include the dynamics of both
-# equilibria. Here, we need to insert `np.nan` to indicate the model that training data
+# equilibria. Here, we need to insert (at least max_delay number of) `np.nan` to
+# indicate the model that training data
 # and test data are from different measurement sessions. The plot shows that the
 # prediction performance of the NARX on test data has been largely improved.
 
-u_all = np.r_[u_train, [[np.nan]], u_test]
-y_all = np.r_[y_train, [np.nan], y_test]
+u_all = np.r_[u_train, [[np.nan]]*max_delay, u_test]
+y_all = np.r_[y_train, [np.nan]*max_delay, y_test]
 narx_model = make_narx(
     X=u_all,
     y=y_all,

diff --git a/fastcan/narx.py b/fastcan/narx.py
@@ -62,10 +62,10 @@ def make_time_shift_features(X, ids):
     >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]
     >>> ids = [[0, 0], [0, 1], [1, 1]]
     >>> make_time_shift_features(X, ids)
-    array([[1., 1., 2.],
-           [3., 1., 2.],
-           [5., 3., 4.],
-           [7., 5., 6.]])
+    array([[ 1., nan, nan],
+           [ 3.,  1.,  2.],
+           [ 5.,  3.,  4.],
+           [ 7.,  5.,  6.]])
     """
     X = check_array(X, ensure_2d=True, dtype=float, ensure_all_finite="allow-nan")
     ids = check_array(ids, ensure_2d=True, dtype=int)
@@ -74,7 +74,7 @@ def make_time_shift_features(X, ids):
     out = np.zeros([n_samples, n_outputs])
     for i, id_temp in enumerate(ids):
         out[:, i] = np.r_[
-            np.full(id_temp[1], X[0, id_temp[0]]),
+            np.full(id_temp[1], np.nan),
             X[: -id_temp[1] or None, id_temp[0]],
         ]
 
@@ -1413,11 +1413,11 @@ def make_narx(
     >>> print_narx(narx)
     | yid |        Term        |   Coef   |
     =======================================
-    |  0  |     Intercept      |  1.054   |
-    |  0  |    y_hat[k-1,0]    |  0.483   |
-    |  0  |   X[k,0]*X[k,0]    |  0.307   |
-    |  0  | X[k-1,0]*X[k-3,0]  |  1.999   |
-    |  0  |  X[k-2,0]*X[k,1]   |  1.527   |
+    |  0  |     Intercept      |  1.050   |
+    |  0  |    y_hat[k-1,0]    |  0.484   |
+    |  0  |   X[k,0]*X[k,0]    |  0.306   |
+    |  0  | X[k-1,0]*X[k-3,0]  |  2.000   |
+    |  0  |  X[k-2,0]*X[k,1]   |  1.528   |
     """
     X = check_array(X, dtype=float, ensure_2d=True, ensure_all_finite="allow-nan")
     y = check_array(y, dtype=float, ensure_2d=False, ensure_all_finite="allow-nan")

diff --git a/fastcan/utils.py b/fastcan/utils.py
@@ -168,4 +168,7 @@ def mask_missing_values(*arrays, return_mask=False):
     mask_valid = np.all(np.isfinite(np.c_[arrays]), axis=1)
     if return_mask:
         return mask_valid
+    masked_arrays = [_safe_indexing(x, mask_valid) for x in arrays]
+    if len(masked_arrays) == 1:
+        return masked_arrays[0]
     return [_safe_indexing(x, mask_valid) for x in arrays]
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -66,6 +66,10 @@ def test_mask_missing():
     b = rng.random((100, 2))
     c = rng.random(100)
     a[10, 0] = np.nan
+    a_masked = mask_missing_values(a)
+    mask_valid = mask_missing_values(a, return_mask=True)
+    assert a_masked.shape == (99, 10)
+    assert_array_equal(actual=a_masked, desired=a[mask_valid])
     b[20, 1] = np.nan
     c[30] = np.nan
     a_masked, b_masked, c_mask = mask_missing_values(a, b, c)