ENH speed up beam search (#198)

MatthewSZhang · web-flow · commit 5c2dbe347043 · 2025-11-08T20:30:10.000+08:00
diff --git a/fastcan/_beam.py b/fastcan/_beam.py
@@ -45,17 +45,17 @@ def _beam_search(
 
     for i in range(n_features_to_select - n_inclusions):
         if i == 0:
-            X_support, X_selected = _prepare_candidates(
+            mask, X_selected = _prepare_candidates(
                 X, mask_exclude, indices_include
             )
             if X_selected.shape[1] == 0:
                 beams_scores = np.sum((X.T @ V) ** 2, axis=1)
-                beams_scores[~X_support] = 0
+                beams_scores[mask] = 0
             else:
                 W_selected = orth(X_selected)
                 selected_score = np.sum((W_selected.T @ V) ** 2)
-                beams_scores = _gram_schmidt(
-                    X, X_support, X_selected, selected_score, V, tol
+                beams_scores = _mgs_ssc(
+                    X, V, W_selected, mask, selected_score, tol
                 )
             beams_selected_ids = [indices_include for _ in range(beam_width)]
             beams_selected_ids, top_k_scores = _select_top_k(
@@ -66,11 +66,12 @@ def _beam_search(
             continue
         beams_scores = np.zeros((beam_width, n_features))
         for beam_idx in range(beam_width):
-            X_support, X_selected = _prepare_candidates(
+            mask, X_selected = _prepare_candidates(
                 X, mask_exclude, beams_selected_ids[beam_idx]
             )
-            beams_scores[beam_idx] = _gram_schmidt(
-                X, X_support, X_selected, top_k_scores[beam_idx], V, tol
+            W_selected = orth(X_selected)
+            beams_scores[beam_idx] = _mgs_ssc(
+                X, V, W_selected, mask, top_k_scores[beam_idx], tol
             )
         beams_selected_ids, top_k_scores = _select_top_k(
             beams_scores,
@@ -92,10 +93,10 @@ def _beam_search(
 
 
 def _prepare_candidates(X, mask_exclude, indices_selected):
-    X_support = np.copy(~mask_exclude)
-    X_support[indices_selected] = False
+    mask = np.copy(mask_exclude)
+    mask[indices_selected] = True
     X_selected = X[:, indices_selected]
-    return X_support, X_selected
+    return mask, X_selected
 
 
 def _select_top_k(
@@ -127,31 +128,22 @@ def _select_top_k(
     return new_ids_selected, top_k_scores
 
 
-def _gram_schmidt(X, X_support, X_selected, selected_score, V, tol):
+def _mgs_ssc(X, V, W_selected, mask, selected_score, tol):
     X = np.copy(X)
-    W_selected = orth(X_selected) # Change to Modified Gram-Schmidt
-    scores = np.zeros(X.shape[1])
-    for i, support in enumerate(X_support):
-        if not support:
-            continue
-        xi = X[:, i : i + 1]
-        for j in range(W_selected.shape[1]):
-            proj = W_selected[:, j : j + 1].T @ xi
-            xi -= proj * W_selected[:, j : j + 1]
-        wi, X_support[i] = _safe_normalize(xi)
-        if not X_support[i]:
-            continue
-        if np.any(np.abs(wi.T @ W_selected) > tol):
-            X_support[i] = False
-            continue
-        scores[i] = np.sum((wi.T @ V) ** 2)
+    proj = W_selected.T @ X
+    X -= W_selected @ proj
+    W, non_zero_mask = _safe_normalize(X)
+    mask |= non_zero_mask
+    linear_independent_mask = np.any(np.abs(W.T @ W_selected) > tol, axis=1)
+    mask |= linear_independent_mask
+    scores = np.sum((W.T @ V) ** 2, axis=1)
     scores += selected_score
-    scores[~X_support] = 0
+    scores[mask] = 0
     return scores
 
 
 def _safe_normalize(X):
     norm = np.linalg.norm(X, axis=0)
-    non_zero_support = norm != 0
-    norm[~non_zero_support] = 1
-    return X / norm, non_zero_support
+    non_zero_mask = norm == 0
+    norm[non_zero_mask] = 1
+    return X / norm, non_zero_mask
diff --git a/fastcan/_cancorr_fast.pyx b/fastcan/_cancorr_fast.pyx
@@ -47,7 +47,7 @@ cdef int _iamax(
 
 
 @final
-cdef bint _normv(
+cdef uint8_t _normv(
     const floating* x,          # IN/OUT
     int n_samples,              # IN
 ) noexcept nogil:
diff --git a/pixi.toml b/pixi.toml
@@ -89,7 +89,7 @@ time-eta = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import Fa
 profile-minibatch = { cmd = '''python -c "import cProfile; import numpy as np; from fastcan import minibatch; X = np.random.rand(100, 3000); y = np.random.rand(100, 20); cProfile.run('minibatch(X, y, 1000, 10, verbose=0)', sort='{{ SORT }}')"''', args = [{ arg = "SORT", default = "cumtime" }] }
 time-narx = '''python -m timeit -n 1 -s "import numpy as np; from fastcan.narx import make_narx; rng = np.random.default_rng(5); X = rng.random((1000, 10)); y = rng.random((1000, 2)); m = make_narx(X, y, 10, max_delay=2, poly_degree=2, verbose=0)" "m.fit(X, y, coef_init='one_step_ahead', verbose=1)"'''
 profile-narx = { cmd = '''python -c "import cProfile; import numpy as np; from fastcan.narx import make_narx; rng = np.random.default_rng(8); X = rng.random((3000, 3)); y = rng.random((3000, 3)); m = make_narx(X, y, 10, max_delay=10, poly_degree=2, verbose=0); cProfile.run('m.fit(X, y, coef_init=[0]*33)', sort='{{ SORT }}')"''', args = [{ arg = "SORT", default = "tottime" }] }
-time-beam = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import FastCan; X = np.random.rand(3000, 100); y = np.random.rand(3000, 20)' 's = FastCan(20, beam_width=3, verbose=0).fit(X, y)'"
+time-beam = "python -m timeit -n 5 -s 'import numpy as np; from fastcan import FastCan; X = np.random.rand(3000, 100); y = np.random.rand(3000, 20)' 's = FastCan(20, beam_width=10, verbose=0).fit(X, y)'"
 
 [feature.asv.tasks]
 asv-build = { cmd = "python -m asv machine --machine {{ MACHINE }} --yes && python -m asv run --show-stderr -v --machine {{ MACHINE }} {{ EXTRA_ARGS }}", cwd = "asv_benchmarks", args = [{ arg = "MACHINE", default = "MacOS-M1" }, { arg = "EXTRA_ARGS", default = "" }] }