|
| 1 | +""" |
| 2 | +Beam search. |
| 3 | +""" |
| 4 | + |
| 5 | +# Authors: The fastcan developers |
| 6 | +# SPDX-License-Identifier: MIT |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +from scipy.linalg import orth |
| 10 | + |
| 11 | + |
| 12 | +def _beam_search( |
| 13 | + X, V, n_features_to_select, beam_width, indices_include, mask_exclude, tol, verbose |
| 14 | +): |
| 15 | + """ |
| 16 | + Perform beam search to find the best subset of features. |
| 17 | +
|
| 18 | + Parameters: |
| 19 | + X : np.ndarray |
| 20 | + The transformed input feature matrix. |
| 21 | + V : np.ndarray |
| 22 | + The transformed target variable. |
| 23 | + n_features_to_select : int |
| 24 | + The total number of features to select. |
| 25 | + beam_width : int |
| 26 | + The number of top candidates to keep at each step. |
| 27 | + indices_include : list |
| 28 | + The indices of features that must be included in the selection. |
| 29 | + mask_exclude : np.ndarray, dtype=bool |
| 30 | + A boolean mask indicating which features to exclude. |
| 31 | + tol : float |
| 32 | + Tolerance for numerical stability in Gram-Schmidt process. |
| 33 | + verbose : bool |
| 34 | + If True, print progress information. |
| 35 | +
|
| 36 | + Returns: |
| 37 | + indices : np.ndarray, dtype=np.int32 |
| 38 | + The indices of the selected features. |
| 39 | + """ |
| 40 | + |
| 41 | + n_features = X.shape[1] |
| 42 | + n_inclusions = len(indices_include) |
| 43 | + |
| 44 | + X, _ = _safe_normalize(X) |
| 45 | + |
| 46 | + for i in range(n_features_to_select - n_inclusions): |
| 47 | + if i == 0: |
| 48 | + X_support, X_selected = _prepare_candidates( |
| 49 | + X, mask_exclude, indices_include |
| 50 | + ) |
| 51 | + beams_selected_ids = [indices_include for _ in range(beam_width)] |
| 52 | + W_selected = orth(X_selected) |
| 53 | + selected_score = np.sum((W_selected.T @ V) ** 2) |
| 54 | + beams_scores = _gram_schmidt( |
| 55 | + X, X_support, X_selected, selected_score, V, tol |
| 56 | + ) |
| 57 | + beams_selected_ids, top_k_scores = _select_top_k( |
| 58 | + beams_scores[None, :], |
| 59 | + beams_selected_ids, |
| 60 | + beam_width, |
| 61 | + ) |
| 62 | + continue |
| 63 | + beams_scores = np.zeros((beam_width, n_features)) |
| 64 | + for beam_idx in range(beam_width): |
| 65 | + X_support, X_selected = _prepare_candidates( |
| 66 | + X, mask_exclude, beams_selected_ids[beam_idx] |
| 67 | + ) |
| 68 | + beams_scores[beam_idx] = _gram_schmidt( |
| 69 | + X, X_support, X_selected, top_k_scores[beam_idx], V, tol |
| 70 | + ) |
| 71 | + beams_selected_ids, top_k_scores = _select_top_k( |
| 72 | + beams_scores, |
| 73 | + beams_selected_ids, |
| 74 | + beam_width, |
| 75 | + ) |
| 76 | + if verbose: |
| 77 | + print( |
| 78 | + f"Beam Search: {i + 1 + n_inclusions}/{n_features_to_select}, " |
| 79 | + f"Best Beam: {np.argmax(top_k_scores)}, " |
| 80 | + f"Beam SSC: {top_k_scores.max():.5f}", |
| 81 | + end="\r", |
| 82 | + ) |
| 83 | + if verbose: |
| 84 | + print() |
| 85 | + best_beam = np.argmax(top_k_scores) |
| 86 | + indices = beams_selected_ids[best_beam] |
| 87 | + return np.array(indices, dtype=np.int32, order="F") |
| 88 | + |
| 89 | + |
| 90 | +def _prepare_candidates(X, mask_exclude, indices_selected): |
| 91 | + X_support = np.copy(~mask_exclude) |
| 92 | + X_support[indices_selected] = False |
| 93 | + X_selected = X[:, indices_selected] |
| 94 | + return X_support, X_selected |
| 95 | + |
| 96 | + |
| 97 | +def _select_top_k( |
| 98 | + beams_scores, |
| 99 | + ids_selected, |
| 100 | + beam_width, |
| 101 | +): |
| 102 | + # For explore wider: make each feature in each selection iteration can |
| 103 | + # only be selected once. |
| 104 | + # For explore deeper: allow different beams to select the same feature |
| 105 | + # at the different selection iteration. |
| 106 | + n_features = beams_scores.shape[1] |
| 107 | + beams_max = np.argmax(beams_scores, axis=0) |
| 108 | + scores_max = beams_scores[beams_max, np.arange(n_features)] |
| 109 | + n_valid = np.sum(beams_scores.any(axis=0)) |
| 110 | + n_selected = len(ids_selected[0]) |
| 111 | + if n_valid < beam_width: |
| 112 | + raise ValueError( |
| 113 | + "Beam Search: Not enough valid candidates to select " |
| 114 | + f"beam width number of features, when selecting feature {n_selected + 1}. " |
| 115 | + "Please decrease beam_width or n_features_to_select." |
| 116 | + ) |
| 117 | + |
| 118 | + top_k_ids = np.argpartition(scores_max, -beam_width)[-beam_width:] |
| 119 | + new_ids_selected = [[] for _ in range(beam_width)] |
| 120 | + for k, beam_k in enumerate(beams_max[top_k_ids]): |
| 121 | + new_ids_selected[k] = ids_selected[beam_k] + [top_k_ids[k]] |
| 122 | + top_k_scores = scores_max[top_k_ids] |
| 123 | + return new_ids_selected, top_k_scores |
| 124 | + |
| 125 | + |
| 126 | +def _gram_schmidt(X, X_support, X_selected, selected_score, V, tol, modified=True): |
| 127 | + X = np.copy(X) |
| 128 | + if modified: |
| 129 | + # Change to Modified Gram-Schmidt |
| 130 | + W_selected = orth(X_selected) |
| 131 | + scores = np.zeros(X.shape[1]) |
| 132 | + for i, support in enumerate(X_support): |
| 133 | + if not support: |
| 134 | + continue |
| 135 | + xi = X[:, i : i + 1] |
| 136 | + for j in range(W_selected.shape[1]): |
| 137 | + proj = W_selected[:, j : j + 1].T @ xi |
| 138 | + xi -= proj * W_selected[:, j : j + 1] |
| 139 | + wi, X_support[i] = _safe_normalize(xi) |
| 140 | + if not X_support[i]: |
| 141 | + continue |
| 142 | + if np.any(np.abs(wi.T @ W_selected) > tol): |
| 143 | + X_support[i] = False |
| 144 | + continue |
| 145 | + scores[i] = np.sum((wi.T @ V) ** 2) |
| 146 | + scores += selected_score |
| 147 | + scores[~X_support] = 0 |
| 148 | + return scores |
| 149 | + |
| 150 | + |
| 151 | +def _safe_normalize(X): |
| 152 | + norm = np.linalg.norm(X, axis=0) |
| 153 | + non_zero_support = norm != 0 |
| 154 | + norm[~non_zero_support] = 1 |
| 155 | + return X / norm, non_zero_support |
0 commit comments