| 
 | 1 | +"""  | 
 | 2 | +Beam search.  | 
 | 3 | +"""  | 
 | 4 | + | 
 | 5 | +# Authors: The fastcan developers  | 
 | 6 | +# SPDX-License-Identifier: MIT  | 
 | 7 | + | 
 | 8 | +import numpy as np  | 
 | 9 | +from scipy.linalg import orth  | 
 | 10 | + | 
 | 11 | + | 
 | 12 | +def _beam_search(  | 
 | 13 | +    X, V, n_features_to_select, beam_width, indices_include, mask_exclude, tol, verbose  | 
 | 14 | +):  | 
 | 15 | +    """  | 
 | 16 | +    Perform beam search to find the best subset of features.  | 
 | 17 | +
  | 
 | 18 | +    Parameters:  | 
 | 19 | +    X : np.ndarray  | 
 | 20 | +        The transformed input feature matrix.  | 
 | 21 | +    V : np.ndarray  | 
 | 22 | +        The transformed target variable.  | 
 | 23 | +    n_features_to_select : int  | 
 | 24 | +        The total number of features to select.  | 
 | 25 | +    beam_width : int  | 
 | 26 | +        The number of top candidates to keep at each step.  | 
 | 27 | +    indices_include : list  | 
 | 28 | +        The indices of features that must be included in the selection.  | 
 | 29 | +    mask_exclude : np.ndarray, dtype=bool  | 
 | 30 | +        A boolean mask indicating which features to exclude.  | 
 | 31 | +    tol : float  | 
 | 32 | +        Tolerance for numerical stability in Gram-Schmidt process.  | 
 | 33 | +    verbose : bool  | 
 | 34 | +        If True, print progress information.  | 
 | 35 | +
  | 
 | 36 | +    Returns:  | 
 | 37 | +    indices : np.ndarray, dtype=np.int32  | 
 | 38 | +        The indices of the selected features.  | 
 | 39 | +    """  | 
 | 40 | + | 
 | 41 | +    n_features = X.shape[1]  | 
 | 42 | +    n_inclusions = len(indices_include)  | 
 | 43 | + | 
 | 44 | +    X, _ = _safe_normalize(X)  | 
 | 45 | + | 
 | 46 | +    for i in range(n_features_to_select - n_inclusions):  | 
 | 47 | +        if i == 0:  | 
 | 48 | +            X_support, X_selected = _prepare_candidates(  | 
 | 49 | +                X, mask_exclude, indices_include  | 
 | 50 | +            )  | 
 | 51 | +            beams_selected_ids = [indices_include for _ in range(beam_width)]  | 
 | 52 | +            W_selected = orth(X_selected)  | 
 | 53 | +            selected_score = np.sum((W_selected.T @ V) ** 2)  | 
 | 54 | +            beams_scores = _gram_schmidt(  | 
 | 55 | +                X, X_support, X_selected, selected_score, V, tol  | 
 | 56 | +            )  | 
 | 57 | +            beams_selected_ids, top_k_scores = _select_top_k(  | 
 | 58 | +                beams_scores[None, :],  | 
 | 59 | +                beams_selected_ids,  | 
 | 60 | +                beam_width,  | 
 | 61 | +            )  | 
 | 62 | +            continue  | 
 | 63 | +        beams_scores = np.zeros((beam_width, n_features))  | 
 | 64 | +        for beam_idx in range(beam_width):  | 
 | 65 | +            X_support, X_selected = _prepare_candidates(  | 
 | 66 | +                X, mask_exclude, beams_selected_ids[beam_idx]  | 
 | 67 | +            )  | 
 | 68 | +            beams_scores[beam_idx] = _gram_schmidt(  | 
 | 69 | +                X, X_support, X_selected, top_k_scores[beam_idx], V, tol  | 
 | 70 | +            )  | 
 | 71 | +        beams_selected_ids, top_k_scores = _select_top_k(  | 
 | 72 | +            beams_scores,  | 
 | 73 | +            beams_selected_ids,  | 
 | 74 | +            beam_width,  | 
 | 75 | +        )  | 
 | 76 | +        if verbose:  | 
 | 77 | +            print(  | 
 | 78 | +                f"Beam Search: {i + 1 + n_inclusions}/{n_features_to_select}, "  | 
 | 79 | +                f"Best Beam: {np.argmax(top_k_scores)}, "  | 
 | 80 | +                f"Beam SSC: {top_k_scores.max():.5f}",  | 
 | 81 | +                end="\r",  | 
 | 82 | +            )  | 
 | 83 | +    if verbose:  | 
 | 84 | +        print()  | 
 | 85 | +    best_beam = np.argmax(top_k_scores)  | 
 | 86 | +    indices = beams_selected_ids[best_beam]  | 
 | 87 | +    return np.array(indices, dtype=np.int32, order="F")  | 
 | 88 | + | 
 | 89 | + | 
 | 90 | +def _prepare_candidates(X, mask_exclude, indices_selected):  | 
 | 91 | +    X_support = np.copy(~mask_exclude)  | 
 | 92 | +    X_support[indices_selected] = False  | 
 | 93 | +    X_selected = X[:, indices_selected]  | 
 | 94 | +    return X_support, X_selected  | 
 | 95 | + | 
 | 96 | + | 
 | 97 | +def _select_top_k(  | 
 | 98 | +    beams_scores,  | 
 | 99 | +    ids_selected,  | 
 | 100 | +    beam_width,  | 
 | 101 | +):  | 
 | 102 | +    # For explore wider: make each feature in each selection iteration can  | 
 | 103 | +    # only be selected once.  | 
 | 104 | +    # For explore deeper: allow different beams to select the same feature  | 
 | 105 | +    # at the different selection iteration.  | 
 | 106 | +    n_features = beams_scores.shape[1]  | 
 | 107 | +    beams_max = np.argmax(beams_scores, axis=0)  | 
 | 108 | +    scores_max = beams_scores[beams_max, np.arange(n_features)]  | 
 | 109 | +    n_valid = np.sum(beams_scores.any(axis=0))  | 
 | 110 | +    n_selected = len(ids_selected[0])  | 
 | 111 | +    if n_valid < beam_width:  | 
 | 112 | +        raise ValueError(  | 
 | 113 | +            "Beam Search: Not enough valid candidates to select "  | 
 | 114 | +            f"beam width number of features, when selecting feature {n_selected + 1}. "  | 
 | 115 | +            "Please decrease beam_width or n_features_to_select."  | 
 | 116 | +        )  | 
 | 117 | + | 
 | 118 | +    top_k_ids = np.argpartition(scores_max, -beam_width)[-beam_width:]  | 
 | 119 | +    new_ids_selected = [[] for _ in range(beam_width)]  | 
 | 120 | +    for k, beam_k in enumerate(beams_max[top_k_ids]):  | 
 | 121 | +        new_ids_selected[k] = ids_selected[beam_k] + [top_k_ids[k]]  | 
 | 122 | +    top_k_scores = scores_max[top_k_ids]  | 
 | 123 | +    return new_ids_selected, top_k_scores  | 
 | 124 | + | 
 | 125 | + | 
 | 126 | +def _gram_schmidt(X, X_support, X_selected, selected_score, V, tol, modified=True):  | 
 | 127 | +    X = np.copy(X)  | 
 | 128 | +    if modified:  | 
 | 129 | +        # Change to Modified Gram-Schmidt  | 
 | 130 | +        W_selected = orth(X_selected)  | 
 | 131 | +    scores = np.zeros(X.shape[1])  | 
 | 132 | +    for i, support in enumerate(X_support):  | 
 | 133 | +        if not support:  | 
 | 134 | +            continue  | 
 | 135 | +        xi = X[:, i : i + 1]  | 
 | 136 | +        for j in range(W_selected.shape[1]):  | 
 | 137 | +            proj = W_selected[:, j : j + 1].T @ xi  | 
 | 138 | +            xi -= proj * W_selected[:, j : j + 1]  | 
 | 139 | +        wi, X_support[i] = _safe_normalize(xi)  | 
 | 140 | +        if not X_support[i]:  | 
 | 141 | +            continue  | 
 | 142 | +        if np.any(np.abs(wi.T @ W_selected) > tol):  | 
 | 143 | +            X_support[i] = False  | 
 | 144 | +            continue  | 
 | 145 | +        scores[i] = np.sum((wi.T @ V) ** 2)  | 
 | 146 | +    scores += selected_score  | 
 | 147 | +    scores[~X_support] = 0  | 
 | 148 | +    return scores  | 
 | 149 | + | 
 | 150 | + | 
 | 151 | +def _safe_normalize(X):  | 
 | 152 | +    norm = np.linalg.norm(X, axis=0)  | 
 | 153 | +    non_zero_support = norm != 0  | 
 | 154 | +    norm[~non_zero_support] = 1  | 
 | 155 | +    return X / norm, non_zero_support  | 
0 commit comments