|
| 1 | +""" |
| 2 | +Refine fastcan selection results |
| 3 | +""" |
| 4 | + |
| 5 | +from copy import deepcopy |
| 6 | +from numbers import Integral |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads |
| 10 | +from sklearn.utils._param_validation import Interval, StrOptions, validate_params |
| 11 | +from sklearn.utils.validation import check_is_fitted |
| 12 | + |
| 13 | +from ._cancorr_fast import _forward_search # type: ignore |
| 14 | +from ._fastcan import FastCan, _prepare_search |
| 15 | + |
| 16 | + |
| 17 | +@validate_params( |
| 18 | + { |
| 19 | + "selector": [FastCan], |
| 20 | + "drop": [ |
| 21 | + Interval(Integral, 1, None, closed="left"), |
| 22 | + StrOptions({"all"}), |
| 23 | + "array-like", |
| 24 | + ], |
| 25 | + "max_iter": [ |
| 26 | + None, |
| 27 | + Interval(Integral, 1, None, closed="left"), |
| 28 | + ], |
| 29 | + "verbose": ["verbose"], |
| 30 | + }, |
| 31 | + prefer_skip_nested_validation=True, |
| 32 | +) |
| 33 | +def refine(selector, drop=1, max_iter=None, verbose=1): |
| 34 | + """Two-Stage Refining. |
| 35 | +
|
| 36 | + In the refining process, the selected features will be dropped, and |
| 37 | + the vacancy positions will be refilled from the candidate features. |
| 38 | +
|
| 39 | + The processing of a vacany position is refilled after searching all |
| 40 | + candidate features is called an `iteration`. |
| 41 | +
|
| 42 | + The processing of a vacany position is refilled by a different features |
| 43 | + from the dropped one, which increase the SSC of the selected features |
| 44 | + is called a `valid iteration`. |
| 45 | +
|
| 46 | + Parameters |
| 47 | + ---------- |
| 48 | + selector : FastCan |
| 49 | + FastCan selector. |
| 50 | +
|
| 51 | + drop : int or array-like of shape (n_drops,) or "all", default=1 |
| 52 | + The number of the selected features dropped for the consequencing |
| 53 | + reselection. |
| 54 | +
|
| 55 | + max_iter : int, default=None |
| 56 | + The maximum number of valid iterations in the refining process. |
| 57 | +
|
| 58 | + verbose : int, default=1 |
| 59 | + The verbosity level. |
| 60 | +
|
| 61 | + Returns |
| 62 | + ------- |
| 63 | + indices : ndarray of shape (n_features_to_select,), dtype=int |
| 64 | + The indices of the selected features. |
| 65 | +
|
| 66 | + scores : ndarray of shape (n_features_to_select,), dtype=float |
| 67 | + The h-correlation/eta-cosine of selected features. |
| 68 | +
|
| 69 | + References |
| 70 | + ---------- |
| 71 | + * Zhang L., Li K., Bai E. W. and Irwin G. W. (2015). |
| 72 | + Two-stage orthogonal least squares methods for neural network construction. |
| 73 | + IEEE Transactions on Neural Networks and Learning Systems, 26(8), 1608-1621. |
| 74 | +
|
| 75 | + Examples |
| 76 | + -------- |
| 77 | + >>> from fastcan import FastCan, refine |
| 78 | + >>> X = [[1, 1, 0], [0.01, 0, 0], [-1, 0, 1], [0, 0, 0]] |
| 79 | + >>> y = [1, 0, -1, 0] |
| 80 | + >>> selector = FastCan(2, verbose=0).fit(X, y) |
| 81 | + >>> print(f"Indices: {selector.indices_}", f", SSC: {selector.scores_.sum():.5f}") |
| 82 | + Indices: [0 1] , SSC: 0.99998 |
| 83 | + >>> indices, scores = refine(selector, drop=1, verbose=0) |
| 84 | + >>> print(f"Indices: {indices}", f", SSC: {scores.sum():.5f}") |
| 85 | + Indices: [1 2] , SSC: 1.00000 |
| 86 | + """ |
| 87 | + check_is_fitted(selector) |
| 88 | + X_transformed_ = deepcopy(selector.X_transformed_) |
| 89 | + n_features = selector.n_features_in_ |
| 90 | + n_features_to_select = selector.n_features_to_select |
| 91 | + indices_include = selector.indices_include_ |
| 92 | + indices_exclude = selector.indices_exclude_ |
| 93 | + |
| 94 | + n_inclusions = indices_include.size |
| 95 | + n_selections = n_features_to_select - n_inclusions |
| 96 | + |
| 97 | + if drop == "all": |
| 98 | + drop = np.arange(1, n_selections) |
| 99 | + else: |
| 100 | + drop = np.atleast_1d(drop).astype(int) |
| 101 | + |
| 102 | + if (drop.max() >= n_selections) or (drop.min() < 1): |
| 103 | + raise ValueError( |
| 104 | + "`drop` should be between `1<=drop<n_features_to_select-n_inclusions`, " |
| 105 | + f"but got drop={drop} and n_selections={n_selections}." |
| 106 | + ) |
| 107 | + |
| 108 | + if max_iter is None: |
| 109 | + max_iter = np.inf |
| 110 | + |
| 111 | + n_iters = 0 |
| 112 | + n_valid_iters = 0 |
| 113 | + best_scores = selector.scores_ |
| 114 | + best_indices = selector.indices_ |
| 115 | + best_ssc = selector.scores_.sum() |
| 116 | + indices_temp = best_indices |
| 117 | + for drop_n in drop: |
| 118 | + i = 0 |
| 119 | + while i < n_features: |
| 120 | + rolled_indices = np.r_[ |
| 121 | + indices_include, np.roll(indices_temp[n_inclusions:], -1) |
| 122 | + ] |
| 123 | + indices, scores, mask = _prepare_search( |
| 124 | + n_features, |
| 125 | + n_features_to_select, |
| 126 | + rolled_indices[:-drop_n], |
| 127 | + indices_exclude, |
| 128 | + ) |
| 129 | + n_threads = _openmp_effective_n_threads() |
| 130 | + _forward_search( |
| 131 | + X=X_transformed_, |
| 132 | + V=selector.y_transformed_, |
| 133 | + t=selector.n_features_to_select, |
| 134 | + tol=selector.tol, |
| 135 | + num_threads=n_threads, |
| 136 | + verbose=0, |
| 137 | + mask=mask, |
| 138 | + indices=indices, |
| 139 | + scores=scores, |
| 140 | + ) |
| 141 | + |
| 142 | + if (scores.sum() > best_ssc) and (set(indices) != set(best_indices)): |
| 143 | + i = 0 |
| 144 | + n_valid_iters += 1 |
| 145 | + best_scores = scores |
| 146 | + best_indices = indices |
| 147 | + best_ssc = scores.sum() |
| 148 | + else: |
| 149 | + i += 1 |
| 150 | + |
| 151 | + indices_temp = indices |
| 152 | + n_iters += 1 |
| 153 | + if verbose == 1: |
| 154 | + print( |
| 155 | + f"No. of iterations: {n_iters}, " |
| 156 | + f"No. of valid iterations {n_valid_iters}, " |
| 157 | + f"SSC: {best_scores.sum():.5f}", |
| 158 | + end="\r", |
| 159 | + ) |
| 160 | + |
| 161 | + if n_iters >= max_iter: |
| 162 | + if verbose == 1: |
| 163 | + print() |
| 164 | + return best_indices, best_scores |
| 165 | + |
| 166 | + if verbose == 1: |
| 167 | + print() |
| 168 | + return best_indices, best_scores |
0 commit comments