updated readme with new example

danielhomola · danielhomola · commit 633017155203 · 2017-03-05T12:01:14.000Z
diff --git a/README.md b/README.md
@@ -156,32 +156,32 @@ __verbose__ : int, default=0
     import pandas as pd
     from sklearn.ensemble import RandomForestClassifier
     from boruta import BorutaPy
-
+    
     # load X and y
     # NOTE BorutaPy accepts numpy arrays only, hence the .values attribute
-    X = pd.read_csv('my_X_table.csv', index_col=0).values
-    y = pd.read_csv('my_y_vector.csv', index_col=0).values
-
+    X = pd.read_csv('examples/test_X.csv', index_col=0).values
+    y = pd.read_csv('examples/test_y.csv', header=None, index_col=0).values
+    y = y.ravel()
+    
     # define random forest classifier, with utilising all cores and
     # sampling in proportion to y labels
     rf = RandomForestClassifier(n_jobs=-1, class_weight='auto', max_depth=5)
-
+    
     # define Boruta feature selection method
-    feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2)
-
-    # find all relevant features
+    feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=1)
+    
+    # find all relevant features - 5 features should be selected
     feat_selector.fit(X, y)
-
-    # check selected features
+    
+    # check selected features - first 5 features are selected
     feat_selector.support_
-
+    
     # check ranking of features
     feat_selector.ranking_
-
+    
     # call transform() on X to filter it down to selected features
     X_filtered = feat_selector.transform(X)
 
-
 ## References ##
 
 1. Kursa M., Rudnicki W., "Feature Selection with the Boruta Package" Journal of Statistical Software, Vol. 36, Issue 11, Sep 2010
diff --git a/boruta/boruta_py.py b/boruta/boruta_py.py
@@ -137,7 +137,11 @@ class BorutaPy(BaseEstimator, TransformerMixin):
 
     Examples
     --------
-        
+    
+    import pandas as pd
+    from sklearn.ensemble import RandomForestClassifier
+    from boruta import BorutaPy
+    
     # load X and y
     # NOTE BorutaPy accepts numpy arrays only, hence the .values attribute
     X = pd.read_csv('examples/test_X.csv', index_col=0).values