Merge pull request #12 from MatthewSZhang/docs

MatthewSZhang · web-flow · commit 114b0dced252 · 2024-10-15T10:31:40.000+08:00
DOC fix wrong pick in plot_redundancy
diff --git a/README.rst b/README.rst
@@ -39,7 +39,7 @@ FastCan is a feature selection method, which has following advantages:
 
 #. Skip redundant features.
 
-#. Evalaute relative usefulness of features.
+#. Evaluate relative usefulness of features.
 
 Check `Home Page <https://fastcan.readthedocs.io/en/latest/?badge=latest>`_ for more information.
 
diff --git a/examples/plot_redundancy.py b/examples/plot_redundancy.py
@@ -1,7 +1,7 @@
 """
-===================================================
-Feature selection performance on redundant features
-===================================================
+=================================
+Performance on redundant features
+=================================
 
 .. currentmodule:: fastcan
 
@@ -109,8 +109,7 @@ def get_n_missed(
     n_missed_dep = len(
         np.setdiff1d(dep_info_ids+redundant_ids, selected_ids)
     )-n_redundant
-    if n_missed_dep < 0:
-        n_missed_dep = 0
+    n_missed_dep = max(n_missed_dep, 0)
     return n_missed_indep+n_missed_dep
 
 # %%
@@ -160,7 +159,7 @@ def get_n_missed(
 N_REPEATED = 10
 
 selector_dict = {
-    "fastcan": FastCan(N_SELECTED, verbose=0),
+    "fastcan": FastCan(N_SELECTED, tol=1e-7, verbose=0),
     "skb_reg": SelectKBest(f_regression, k=N_SELECTED),
     "skb_mir": SelectKBest(mutual_info_regression, k=N_SELECTED),
     "sfm_lsvr": SelectFromModel(lsvr, max_features=N_SELECTED, threshold=-np.inf),
@@ -179,7 +178,7 @@ def get_n_missed(
 n_missed = np.zeros((N_REPEATED, N_SELECTORS), dtype=int)
 
 for i in range(N_REPEATED):
-    X, y = make_redundant(
+    data, target = make_redundant(
         n_samples=N_SAMPLES,
         n_features=N_FEATURES,
         dep_info_ids=DEP_INFO_IDS,
@@ -188,7 +187,7 @@ def get_n_missed(
         random_seed=i,
     )
     for j, selector in enumerate(selector_dict.values()):
-        result_ids = selector.fit(X, y).get_support(indices=True)
+        result_ids = selector.fit(data, target).get_support(indices=True)
         n_missed[i, j] = get_n_missed(
             dep_info_ids=DEP_INFO_IDS,
             indep_info_ids=INDEP_INFO_IDS,
diff --git a/fastcan/_utils.py b/fastcan/_utils.py
@@ -80,6 +80,17 @@ def ols(X, y, t=1):
     scores : ndarray of shape (n_features_to_select,), dtype=float
         The scores of selected features. The order of
         the scores is corresponding to the feature selection process.
+
+    Examples
+    --------
+    >>> from fastcan import ols
+    >>> X = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]]
+    >>> y = [1, 0, 1, 0]
+    >>> indices, scores = ols(X, y, 2)
+    >>> indices
+    array([0, 2])
+    >>> scores
+    array([0.5, 0.5])
     """
     X, y = check_X_y(X, y, dtype=float, ensure_2d=True)
     n_features = X.shape[1]
diff --git a/pixi.lock b/pixi.lock