scikit-learn-contrib · MatthewSZhang · May 16, 2025
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,5 @@
+## Checklist
+
+- [ ] Used a personal fork to propose changes
+- [ ] A reference to a related issue:
+- [ ] A description of the changes
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,8 +11,29 @@ jobs:
   call-test:
     uses: ./.github/workflows/test.yml
     secrets: inherit
+
+  build-sdist:
+    runs-on: ubuntu-latest
+    needs: call-test
+    steps:
+      - uses: actions/checkout@v4
+      - uses: prefix-dev/[email protected]
+        with:
+          environments: dev
+          cache: true
+      - name: Re-install local
+        run: |
+          pixi reinstall -e dev --frozen fastcan
+      - name: Build SDist
+        run: |
+          pixi run build-sdist
+      - name: Store artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: cibw-sdist
+          path: dist/*.tar.gz
 
-  build:
+  build-wheels:
     strategy:
       fail-fast: false
       matrix:
@@ -22,7 +43,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.22.0
+        uses: pypa/cibuildwheel@v2.23.3
         env:
           CIBW_BUILD: cp3*-*
           CIBW_SKIP: pp* *i686* *musllinux* *-macosx_universal2 *-manylinux_ppc64le *-manylinux_s390x
@@ -31,8 +52,6 @@ jobs:
           CIBW_ARCHS_MACOS: x86_64 arm64
           CIBW_ARCHS_WINDOWS: auto64
           CIBW_BEFORE_ALL_LINUX: yum install -y ninja-build python3-devel
-          CIBW_BEFORE_ALL_WINDOWS: choco install ninja
-          CIBW_BEFORE_ALL_MACOS: brew install ninja
           # Needed on Windows CI to compile with Visual Studio compiler
           # otherwise Meson detects a MINGW64 platform and use MINGW64
           # toolchain

diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml
@@ -0,0 +1,35 @@
+name: Static
+
+on:
+  workflow_call:
+
+jobs:
+  static:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: prefix-dev/[email protected]
+        with:
+          environments: static
+          cache: true
+
+      - name: Re-install local
+        run: |
+          pixi reinstall -e static --frozen fastcan
+
+      - name: Linter
+        run: |
+          pixi run lint
+      - name: Lint Cython
+        run: |
+          pixi run cython-lint
+      - name: Formatter
+        run: |
+          pixi run fmt
+      - name: Type check
+        run: |
+          pixi run type
+      - name: Spell check
+        run: |
+          pixi run spell
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -8,32 +8,30 @@ on:
     branches: ["*.X"]
 
 jobs:
+  call-lint:
+    uses: ./.github/workflows/static.yml
+
   test:
-    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
 
     steps:
       - uses: actions/checkout@v4
-      - uses: prefix-dev/[email protected].3
+      - uses: prefix-dev/[email protected].8
         with:
-          environments: default
+          environments: dev
           cache: true
 
       - name: Re-install local
         run: |
-          pixi run rebuild
+          # Needed on Windows CI to compile with Visual Studio compiler
+          # otherwise Meson detects a MINGW64 platform and use MINGW64
+          # toolchain
+          pixi reinstall -e dev --frozen fastcan -- -Csetup-args=--vsenv
 
-      - name: Lint with ruff
-        run: |
-          pixi run lint
-      - name: Lint with cython-lint
-        run: |
-          pixi run cython-lint
-      - name: Format with black
-        run: |
-          pixi run fmt
-      - name: Type check with mypy
-        run: |
-          pixi run type
       - name: Test with pytest
         run: |
           pixi run test
@@ -42,19 +40,16 @@ jobs:
         run: |
           pixi run doc
           CMD=doctest pixi run doc
+      - name: Test nogil
+        run: |
+          pixi run nogil-eta
       - name: Test coverage
+        if: runner.os == 'Linux'
         shell: bash
         run: |
           FMT=xml pixi run test-coverage
       - name: Upload coverage reports to Codecov
+        if: runner.os == 'Linux'
         uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
-      - name: Build SDist
-        run: |
-          pixi run build-sdist
-      - name: Store artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: cibw-sdist
-          path: dist/*.tar.gz
diff --git a/doc/multioutput.rst b/doc/multioutput.rst
@@ -12,7 +12,7 @@ MIMO (Multi-Input Multi-Output) data. For classification, it can be used for
 multilabel data. Actually, for multiclass classification, which has one output with
 multiple categories, multioutput feature selection can also be useful. The multiclass
 classification can be converted to multilabel classification by one-hot encoding
-target ``y``. The cannonical correaltion coefficient between the features ``X`` and the
+target ``y``. The canonical correaltion coefficient between the features ``X`` and the
 one-hot encoded target ``y`` has equivalent relationship with Fisher's criterion in
 LDA (Linear Discriminant Analysis) [1]_. Applying :class:`FastCan` to the converted
 multioutput data may result in better accuracy in the following classification task

diff --git a/doc/narx.rst b/doc/narx.rst
@@ -82,7 +82,7 @@ It should also be noted the different types of predictions in model training.
 ARX and OE model
 ----------------
 
-To better understant the two types of training, it is helpful to know two linear time series model structures,
+To better understand the two types of training, it is helpful to know two linear time series model structures,
 i.e., `ARX (AutoRegressive eXogenous) model <https://www.mathworks.com/help/ident/ref/arx.html>`_ and
 `OE (output error) model <https://www.mathworks.com/help/ident/ref/oe.html>`_.
 

diff --git a/doc/ols_and_omp.rst b/doc/ols_and_omp.rst
@@ -12,7 +12,7 @@ The detailed difference between OLS and OMP can be found in [3]_.
 Here, let's briefly compare the three methods.
 
 
-Assume we have a feature matrix :math:`X_s \in \mathbb{R}^{N\times t}`, which constains
+Assume we have a feature matrix :math:`X_s \in \mathbb{R}^{N\times t}`, which contains
 :math:`t` selected features, and a target vector :math:`y \in \mathbb{R}^{N\times 1}`.
 Then the residual :math:`r \in \mathbb{R}^{N\times 1}` of the least-squares can be
 found by

diff --git a/doc/pruning.rst b/doc/pruning.rst
@@ -22,9 +22,9 @@ should be selected, as any additional samples can be represented by linear combi
 Therefore, the number to select has to be set to small.
 
 To solve this problem, we use :func:`minibatch` to loose the redundancy check of :class:`FastCan`.
-The original :class:`FastCan` checks the redunancy within :math:`X_s \in \mathbb{R}^{n\times t}`, 
+The original :class:`FastCan` checks the redundancy within :math:`X_s \in \mathbb{R}^{n\times t}`, 
 which contains :math:`t` selected samples and n features,
-and the redunancy within :math:`Y \in \mathbb{R}^{n\times m}`, which contains :math:`m` atoms :math:`y_i`.
+and the redundancy within :math:`Y \in \mathbb{R}^{n\times m}`, which contains :math:`m` atoms :math:`y_i`.
 :func:`minibatch` ranks samples with multiple correlation coefficients between :math:`X_b \in \mathbb{R}^{n\times b}` and :math:`y_i`,
 where :math:`b` is batch size and :math:`b <= t`, instead of canonical correlation coefficients between :math:`X_s` and :math:`Y`,
 which is used in :class:`FastCan`.

diff --git a/examples/plot_fisher.py b/examples/plot_fisher.py
@@ -5,7 +5,7 @@
 
 .. currentmodule:: fastcan
 
-In this examples, we will demonstrate the cannonical correaltion coefficient
+In this examples, we will demonstrate the canonical correaltion coefficient
 between the features ``X`` and the one-hot encoded target ``y`` has equivalent
 relationship with Fisher's criterion in LDA (Linear Discriminant Analysis).
 """

diff --git a/examples/plot_intuitive.py b/examples/plot_intuitive.py
@@ -22,10 +22,10 @@
 # the predicted target by a linear regression model) and the target to describe its
 # usefulness, the results are shown in the following figure. It can be seen that
 # Feature 2 is the most useful and Feature 8 is the second. However, does that mean
-# that the total usefullness of Feature 2 + Feature 8 is the sum of their R-squared
+# that the total usefulness of Feature 2 + Feature 8 is the sum of their R-squared
 # scores? Probably not, because there may be redundancy between Feature 2 and Feature 8.
 # Actually, what we want is a kind of usefulness score which has the **superposition**
-# property, so that the usefullness of each feature can be added together without
+# property, so that the usefulness of each feature can be added together without
 # redundancy.
 
 import matplotlib.pyplot as plt
@@ -125,7 +125,7 @@ def plot_bars(ids, r2_left, r2_selected):
 # Select the third feature
 # ------------------------
 # Again, let's compute the R-squared between Feature 2 + Feature 8 + Feature i and
-# the target, and the additonal R-squared contributed by the rest of the features is
+# the target, and the additional R-squared contributed by the rest of the features is
 # shown in following figure. It can be found that after selecting Features 2 and 8, the
 # rest of the features can provide a very limited contribution.
 
@@ -145,8 +145,8 @@ def plot_bars(ids, r2_left, r2_selected):
 # at the RHS of the dashed lines. The fast computational speed is achieved by
 # orthogonalization, which removes the redundancy between the features. We use the
 # orthogonalization first to makes the rest of features orthogonal to the selected
-# features and then compute their additonal R-squared values. ``eta-cosine`` uses
-# the samilar idea, but has an additonal preprocessing step to compress the features
+# features and then compute their additional R-squared values. ``eta-cosine`` uses
+# the similar idea, but has an additional preprocessing step to compress the features
 # :math:`X \in \mathbb{R}^{N\times n}` and the target
 # :math:`X \in \mathbb{R}^{N\times n}` to :math:`X_c \in \mathbb{R}^{(m+n)\times n}`
 # and :math:`Y_c \in \mathbb{R}^{(m+n)\times m}`.

diff --git a/examples/plot_narx.py b/examples/plot_narx.py
@@ -125,11 +125,14 @@
 # In the printed NARX model, it is found that :class:`FastCan` selects the correct
 # terms and the coefficients are close to the true values.
 
-from fastcan.narx import NARX, print_narx
+from fastcan.narx import NARX, print_narx, tp2fd
+
+# Convert poly_ids and time_shift_ids to feat_ids and delay_ids
+feat_ids, delay_ids = tp2fd(time_shift_ids, selected_poly_ids)
 
 narx_model = NARX(
-    time_shift_ids=time_shift_ids,
-    poly_ids=selected_poly_ids,
+    feat_ids=feat_ids,
+    delay_ids=delay_ids,
 )
 
 narx_model.fit(X, y)
@@ -145,7 +148,7 @@
 auto_narx_model = make_narx(
     X=X,
     y=y,
-    n_features_to_select=4,
+    n_terms_to_select=4,
     max_delay=3,
     poly_degree=2,
     verbose=0,

diff --git a/examples/plot_narx_msa.py b/examples/plot_narx_msa.py
@@ -15,7 +15,7 @@
 # Nonlinear system
 # ----------------
 #
-# `Duffing equation <https://en.wikipedia.org/wiki/Duffing_equation>` is used to
+# `Duffing equation <https://en.wikipedia.org/wiki/Duffing_equation>`_ is used to
 # generate simulated data. The mathematical model is given by
 #
 # .. math::
@@ -82,15 +82,18 @@ def auto_duffing_equation(y, t):
 dur = 10
 n_samples = 1000
 
+rng = np.random.default_rng(12345)
+e_train = rng.normal(0, 0.0002, n_samples)
+e_test = rng.normal(0, 0.0002, n_samples)
 t = np.linspace(0, dur, n_samples)
 
 sol = odeint(duffing_equation, [0.6, 0.8], t)
 u_train = 2.5 * np.cos(2 * np.pi * t).reshape(-1, 1)
-y_train = sol[:, 0]
+y_train = sol[:, 0] + e_train
 
-sol = odeint(auto_duffing_equation, [0.6, -0.8], t)
+sol = odeint(duffing_equation, [0.6, -0.8], t)
 u_test = 2.5 * np.cos(2 * np.pi * t).reshape(-1, 1)
-y_test = sol[:, 0]
+y_test = sol[:, 0] + e_test
 
 # %%
 # One-step-head VS. multi-step-ahead NARX
@@ -105,12 +108,12 @@ def auto_duffing_equation(y, t):
 
 from fastcan.narx import make_narx
 
-max_delay = 2
+max_delay = 3
 
 narx_model = make_narx(
     X=u_train,
     y=y_train,
-    n_features_to_select=10,
+    n_terms_to_select=5,
     max_delay=max_delay,
     poly_degree=3,
     verbose=0,
@@ -159,7 +162,7 @@ def plot_prediction(ax, t, y_true, y_pred, title):
 narx_model = make_narx(
     X=u_all,
     y=y_all,
-    n_features_to_select=10,
+    n_terms_to_select=5,
     max_delay=max_delay,
     poly_degree=3,
     verbose=0,