scikit-learn-contrib · MatthewSZhang · Aug 1, 2025 · Aug 1, 2025 · Aug 1, 2025 · Aug 1, 2025
diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml
@@ -9,12 +9,12 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Build WASM wheel
-        uses: pypa/cibuildwheel@v3.0.1
+        uses: pypa/cibuildwheel@v3.1.2
         env:
           CIBW_PLATFORM: pyodide
       - name: Upload package
         uses: actions/upload-artifact@v4
         with:
           name: wasm_wheel
           path: ./wheelhouse/*_wasm32.whl
-          if-no-files-found: error
+          if-no-files-found: error
diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml
@@ -9,7 +9,7 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
-      - uses: prefix-dev/[email protected].11
+      - uses: prefix-dev/[email protected].14
         with:
           environments: static
           cache: true

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -16,7 +16,7 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
-      - uses: prefix-dev/[email protected].11
+      - uses: prefix-dev/[email protected].14
         with:
           environments: >-
             dev

diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: prefix-dev/[email protected].11
+      - uses: prefix-dev/[email protected].14
         with:
           environments: dev
           cache: true
@@ -33,9 +33,9 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Build wheels
-        uses: pypa/cibuildwheel@v3.0.1
+        uses: pypa/cibuildwheel@v3.1.2
         env:
-          CIBW_SKIP: "*_i686 *_ppc64le *_s390x *_universal2 *-musllinux_*"
+          CIBW_SKIP: "*_i686 *_ppc64le *_s390x *_universal2 *-musllinux_* cp314*"
           CIBW_PROJECT_REQUIRES_PYTHON: ">=3.10"
           CIBW_ARCHS_LINUX: auto
           CIBW_ARCHS_MACOS: x86_64 arm64

diff --git a/README.rst b/README.rst
@@ -69,19 +69,27 @@ Getting Started
 ...     [-2.79, -0.02, -0.85 ],
 ...     [-1.34, -0.48, -2.55 ],
 ...     [ 1.92,  1.48,  0.65 ]]
->>> y = [[0, 0], [1, 1], [0, 0], [1, 0]] # Multioutput feature selection
->>> selector = FastCan(n_features_to_select=2, verbose=0).fit(X, y)
+>>> # Multioutput feature selection
+>>> y = [[0, 0], [1, 1], [0, 0], [1, 0]]
+>>> selector = FastCan(
+...     n_features_to_select=2, verbose=0
+... ).fit(X, y)
 >>> selector.get_support()
 array([ True,  True, False])
->>> selector.get_support(indices=True) # Sorted indices
+>>> # Sorted indices
+>>> selector.get_support(indices=True)
 array([0, 1])
->>> selector.indices_ # Indices in selection order
+>>> # Indices in selection order
+>>> selector.indices_
 array([1, 0], dtype=int32)
->>> selector.scores_ # Scores for selected features in selection order
+>>> # Scores for selected features in selection order
+>>> selector.scores_
 array([0.91162413, 0.71089547])
 >>> # Here Feature 2 must be included
->>> selector = FastCan(n_features_to_select=2, indices_include=[2], verbose=0).fit(X, y)
->>> # We can find the feature which is useful when working with Feature 2
+>>> selector = FastCan(
+...     n_features_to_select=2, indices_include=[2], verbose=0
+... ).fit(X, y)
+>>> # The feature which is useful when working with Feature 2
 >>> selector.indices_
 array([2, 0], dtype=int32)
 >>> selector.scores_
@@ -92,7 +100,7 @@ NARX Time Series Modelling
 --------------------------
 fastcan can be used for system identification.
 In particular, we provide a submodule `fastcan.narx` to build Nonlinear AutoRegressive eXogenous (NARX) models.
-For more information, check our `Home Page <https://fastcan.readthedocs.io/en/latest/>`_.
+For more information, check this `NARX model example <https://fastcan.readthedocs.io/en/latest/auto_examples/plot_narx.html>`_.
 
 
 Support Free-Threaded Wheels

diff --git a/doc/narx.rst b/doc/narx.rst
@@ -52,10 +52,11 @@ No matter how the discontinuity is caused, :class:`NARX` treats the discontinuou
     It is easy to notify :class:`NARX` that the time series data are from different measurement sessions by inserting np.nan to break the data. For example,
 
     >>> import numpy as np
-    >>> x0 = np.zeros((3, 2)) # First measurement session has 3 samples with 2 features
-    >>> x1 = np.ones((5, 2)) # Second measurement session has 5 samples with 2 features
+    >>> x0 = np.zeros((3, 2)) # First measurement session
+    >>> x1 = np.ones((5, 2)) # Second measurement session
     >>> max_delay = 2 # Assume the maximum delay for NARX model is 2
-    >>> u = np.r_[x0, [[np.nan, np.nan]]*max_delay, x1] # Insert (at least max_delay number of) np.nan to break the two measurement sessions
+    >>> # Insert (at least max_delay number of) np.nan to break the two measurement sessions
+    >>> u = np.r_[x0, [[np.nan, np.nan]]*max_delay, x1]
 
     It is important to break the different measurement sessions by np.nan, because otherwise,
     the model will assume the time interval between the two measurement sessions is the same as the time interval within a session.

diff --git a/examples/plot_forecasting.py b/examples/plot_forecasting.py
@@ -0,0 +1,142 @@
+"""
+======================================
+Forecasting with (Nonlinear) AR models
+======================================
+
+.. currentmodule:: fastcan.narx
+
+In this examples, we will demonstrate how to use :func:`make_narx` to build (nonlinear)
+AutoRegressive (AR) models for time-series forecasting.
+The time series used isthe monthly average atmospheric CO2 concentrations
+from 1958 and 2001.
+The objective is to forecast the CO2 concentration till nowadays with
+initial 18 months data.
+
+.. rubric:: Credit
+
+* The majority of code is adapted from the scikit-learn tutorial
+  `Forecasting of CO2 level on Mona Loa dataset using Gaussian process regression (GPR)
+  <https://scikit-learn.org/stable/auto_examples/gaussian_process/plot_gpr_co2.html>`_.
+"""
+
+# Authors: The fastcan developers
+# SPDX-License-Identifier: MIT
+
+# %%
+# Prepare data
+# ------------
+# We use the data consists of the monthly average atmospheric CO2 concentrations
+# (in parts per million by volume (ppm)) collected at the Mauna Loa Observatory
+# in Hawaii, between 1958 and 2001.
+
+from sklearn.datasets import fetch_openml
+
+co2 = fetch_openml(data_id=41187, as_frame=True)
+co2 = co2.frame
+co2.head()
+
+# %%
+# First, we process the original dataframe to create a date column and select it along
+# with the CO2 column. Here, date columns is used only for plotting, as there is no
+# inputs (including time or date) required in AR models.
+
+import pandas as pd
+
+co2_data = co2[["year", "month", "day", "co2"]].assign(
+    date=lambda x: pd.to_datetime(x[["year", "month", "day"]])
+)[["date", "co2"]]
+co2_data.head()
+
+
+# %%
+# The CO2 concentration are from March, 1958 to December, 2001, which
+# is shown in the plot below.
+
+import matplotlib.pyplot as plt
+
+plt.plot(co2_data["date"], co2_data["co2"])
+plt.xlabel("date")
+plt.ylabel("CO$_2$ concentration (ppm)")
+_ = plt.title("Raw air samples measurements from the Mauna Loa Observatory")
+
+# %%
+# We will preprocess the dataset by taking a monthly average to smooth the data.
+# The months which have no measurements were collected should not be dropped.
+# Because AR models require the time intervals between the two neighboring measurements
+# are consistent.
+# As the results, the NaN values should be kept as the placeholders to maintain the
+# time intervals, and :class:`NARX` can handle the missing values properly.
+
+co2_data = co2_data.set_index("date").resample("ME")["co2"].mean().reset_index()
+plt.plot(co2_data["date"], co2_data["co2"])
+plt.xlabel("date")
+plt.ylabel("Monthly average of CO$_2$ concentration (ppm)")
+_ = plt.title(
+    "Monthly average of air samples measurements\nfrom the Mauna Loa Observatory"
+)
+
+# %%
+# For plotting, the time axis for training is from March, 1958 to December, 2001,
+# which is converted it into a numeric, e.g., March, 1958 will be converted to 1958.25.
+# The time axis for test is from March, 1958 to nowadays.
+
+import datetime
+
+import numpy as np
+
+today = datetime.datetime.now()
+current_month = today.year + today.month / 12
+
+x_train = (co2_data["date"].dt.year + co2_data["date"].dt.month / 12).to_numpy()
+x_test = np.arange(start=1958.25, stop=current_month, step=1 / 12)
+
+# %%
+# Nonlinear AR model
+# ------------------
+# We can use :func:`make_narx` to easily build a nonlinear AR model, which does not
+# has a input. Therefore, the input ``X`` is set as ``None``.
+# :func:`make_narx` will search 10 polynomial terms, whose maximum degree is 2 and
+# maximum delay is 9.
+
+from fastcan.narx import make_narx, print_narx
+
+max_delay = 9
+model = make_narx(
+    None,
+    co2_data["co2"],
+    n_terms_to_select=10,
+    max_delay=max_delay,
+    poly_degree=2,
+    verbose=0,
+)
+model.fit(None, co2_data["co2"], coef_init="one_step_ahead")
+print_narx(model, term_space=27)
+
+# %%
+# Forecasting performance
+# -----------------------
+# As AR model does not require input data, the input ``X`` in :func:`predict`
+# is used to indicate total steps to forecast. The initial conditions ``y_init``
+# is the first 18 months data from the ground truth, which contains missing values.
+# If there is no missing value given to ``y_init``, we can only use ``max_delay``
+# number of samples as the initial conditions.
+# However, if missing values are given to ``y_init``, :class:`NARX` will break
+# the data into multiple time series according to the missing values. For each
+# time series, at least ``max_delay`` number of samples, which does not have
+# missing values, are required to do the proper forecasting.
+# The results show our fitted model is capable to forecast to future years with
+# only first 18 months data.
+
+y_pred = model.predict(
+    len(x_test),
+    y_init=co2_data["co2"][:18],
+)
+plt.plot(x_test, y_pred, label="Predicted", c="tab:orange")
+plt.plot(x_train, co2_data["co2"], label="Actual", linestyle="dashed", c="tab:blue")
+plt.legend()
+plt.xlabel("Year")
+plt.ylabel("Monthly average of CO$_2$ concentration (ppm)")
+_ = plt.title(
+    "Monthly average of air samples measurements\nfrom the Mauna Loa Observatory"
+)
+plt.show()
diff --git a/examples/plot_narx_msa.py b/examples/plot_narx_msa.py
@@ -33,8 +33,11 @@
 
 def duffing_equation(y, t):
     """Non-autonomous system"""
+    # y1 is displacement and y2 is velocity
     y1, y2 = y
+    # u is sinusoidal input
     u = 2.5 * np.cos(2 * np.pi * t)
+    # dydt is derivative of y1 and y2
     dydt = [y2, -0.1 * y2 + y1 - 0.25 * y1**3 + u]
     return dydt
 
@@ -79,20 +82,26 @@ def auto_duffing_equation(y, t):
 # In the phase portraits, it is shown that the system has two stable equilibria.
 # We use one to generate training data and the other to generate test data.
 
+# 10 s duration with 0.01 Hz sampling time,
+# so 1000 samples in total for each measurement
 dur = 10
 n_samples = 1000
+t = np.linspace(0, dur, n_samples)
+# External excitation is the same for each measurement
+u = 2.5 * np.cos(2 * np.pi * t).reshape(-1, 1)
 
+# Small additional white noise
 rng = np.random.default_rng(12345)
-e_train = rng.normal(0, 0.0004, n_samples)
+e_train_0 = rng.normal(0, 0.0004, n_samples)
 e_test = rng.normal(0, 0.0004, n_samples)
-t = np.linspace(0, dur, n_samples)
 
+# Solve differential equation to get displacement as y
+# Initial condition at displacement 0.6 and velocity 0.8
 sol = odeint(duffing_equation, [0.6, 0.8], t)
-u_train = 2.5 * np.cos(2 * np.pi * t).reshape(-1, 1)
-y_train = sol[:, 0] + e_train
+y_train_0 = sol[:, 0] + e_train_0
 
+# Initial condition at displacement 0.6 and velocity -0.8
 sol = odeint(duffing_equation, [0.6, -0.8], t)
-u_test = 2.5 * np.cos(2 * np.pi * t).reshape(-1, 1)
 y_test = sol[:, 0] + e_test
 
 # %%
@@ -111,8 +120,8 @@ def auto_duffing_equation(y, t):
 max_delay = 3
 
 narx_model = make_narx(
-    X=u_train,
-    y=y_train,
+    X=u,
+    y=y_train_0,
     n_terms_to_select=5,
     max_delay=max_delay,
     poly_degree=3,
@@ -129,17 +138,19 @@ def plot_prediction(ax, t, y_true, y_pred, title):
     ax.set_ylabel("y(t)")
 
 
-narx_model.fit(u_train, y_train)
-y_train_osa_pred = narx_model.predict(u_train, y_init=y_train[:max_delay])
-y_test_osa_pred = narx_model.predict(u_test, y_init=y_test[:max_delay])
+# OSA NARX
+narx_model.fit(u, y_train_0)
+y_train_0_osa_pred = narx_model.predict(u, y_init=y_train_0[:max_delay])
+y_test_osa_pred = narx_model.predict(u, y_init=y_test[:max_delay])
 
-narx_model.fit(u_train, y_train, coef_init="one_step_ahead")
-y_train_msa_pred = narx_model.predict(u_train, y_init=y_train[:max_delay])
-y_test_msa_pred = narx_model.predict(u_test, y_init=y_test[:max_delay])
+# MSA NARX
+narx_model.fit(u, y_train_0, coef_init="one_step_ahead")
+y_train_0_msa_pred = narx_model.predict(u, y_init=y_train_0[:max_delay])
+y_test_msa_pred = narx_model.predict(u, y_init=y_test[:max_delay])
 
 fig, ax = plt.subplots(2, 2, figsize=(8, 6))
-plot_prediction(ax[0, 0], t, y_train, y_train_osa_pred, "OSA NARX on Train")
-plot_prediction(ax[0, 1], t, y_train, y_train_msa_pred, "MSA NARX on Train")
+plot_prediction(ax[0, 0], t, y_train_0, y_train_0_osa_pred, "OSA NARX on Train 0")
+plot_prediction(ax[0, 1], t, y_train_0, y_train_0_msa_pred, "MSA NARX on Train 0")
 plot_prediction(ax[1, 0], t, y_test, y_test_osa_pred, "OSA NARX on Test")
 plot_prediction(ax[1, 1], t, y_test, y_test_msa_pred, "MSA NARX on Test")
 fig.tight_layout()
@@ -152,14 +163,21 @@ def plot_prediction(ax, t, y_true, y_pred, title):
 #
 # The plot above shows that the NARX model cannot capture the dynamics at
 # the left equilibrium shown in the phase portraits. To improve the performance, let us
-# combine the training and test data for model training to include the dynamics of both
-# equilibria. Here, we need to insert (at least max_delay number of) `np.nan` to
-# indicate the model that training data
-# and test data are from different measurement sessions. The plot shows that the
+# append another measurement session to the training data to include the dynamics of
+# both equilibria. Here, we need to insert (at least max_delay number of) `np.nan` to
+# indicate the model that the original training data and the appended data
+# are from different measurement sessions. The plot shows that the
 # prediction performance of the NARX on test data has been largely improved.
 
-u_all = np.r_[u_train, [[np.nan]] * max_delay, u_test]
-y_all = np.r_[y_train, [np.nan] * max_delay, y_test]
+e_train_1 = rng.normal(0, 0.0004, n_samples)
+
+# Solve differential equation to get displacement as y
+# Initial condition at displacement 0.5 and velocity -1
+sol = odeint(duffing_equation, [0.5, -1], t)
+y_train_1 = sol[:, 0] + e_train_1
+
+u_all = np.r_[u, [[np.nan]] * max_delay, u]
+y_all = np.r_[y_train_0, [np.nan] * max_delay, y_train_1]
 narx_model = make_narx(
     X=u_all,
     y=y_all,
@@ -170,17 +188,21 @@ def plot_prediction(ax, t, y_true, y_pred, title):
 )
 
 narx_model.fit(u_all, y_all)
-y_train_osa_pred = narx_model.predict(u_train, y_init=y_train[:max_delay])
-y_test_osa_pred = narx_model.predict(u_test, y_init=y_test[:max_delay])
+y_train_0_osa_pred = narx_model.predict(u, y_init=y_train_0[:max_delay])
+y_train_1_osa_pred = narx_model.predict(u, y_init=y_train_1[:max_delay])
+y_test_osa_pred = narx_model.predict(u, y_init=y_test[:max_delay])
 
 narx_model.fit(u_all, y_all, coef_init="one_step_ahead")
-y_train_msa_pred = narx_model.predict(u_train, y_init=y_train[:max_delay])
-y_test_msa_pred = narx_model.predict(u_test, y_init=y_test[:max_delay])
-
-fig, ax = plt.subplots(2, 2, figsize=(8, 6))
-plot_prediction(ax[0, 0], t, y_train, y_train_osa_pred, "OSA NARX on Train")
-plot_prediction(ax[0, 1], t, y_train, y_train_msa_pred, "MSA NARX on Train")
-plot_prediction(ax[1, 0], t, y_test, y_test_osa_pred, "OSA NARX on Test")
-plot_prediction(ax[1, 1], t, y_test, y_test_msa_pred, "MSA NARX on Test")
+y_train_0_msa_pred = narx_model.predict(u, y_init=y_train_0[:max_delay])
+y_train_1_msa_pred = narx_model.predict(u, y_init=y_train_1[:max_delay])
+y_test_msa_pred = narx_model.predict(u, y_init=y_test[:max_delay])
+
+fig, ax = plt.subplots(3, 2, figsize=(8, 9))
+plot_prediction(ax[0, 0], t, y_train_0, y_train_0_osa_pred, "OSA NARX on Train 0")
+plot_prediction(ax[0, 1], t, y_train_0, y_train_0_msa_pred, "MSA NARX on Train 0")
+plot_prediction(ax[1, 0], t, y_train_1, y_train_1_osa_pred, "OSA NARX on Train 1")
+plot_prediction(ax[1, 1], t, y_train_1, y_train_1_msa_pred, "MSA NARX on Train 1")
+plot_prediction(ax[2, 0], t, y_test, y_test_osa_pred, "OSA NARX on Test")
+plot_prediction(ax[2, 1], t, y_test, y_test_msa_pred, "MSA NARX on Test")
 fig.tight_layout()
 plt.show()