Merge pull request #111 from DasCapschen/pass_only_shape

arokem · web-flow · commit 17152c1ce1ef · 2022-08-12T20:12:54.000-07:00
pass only shape for X_train
diff --git a/examples/plot_mpg.py b/examples/plot_mpg.py
@@ -51,7 +51,7 @@
 plt.show()
 
 # Calculate the variance
-mpg_V_IJ_unbiased = fci.random_forest_error(mpg_forest, mpg_X_train,
+mpg_V_IJ_unbiased = fci.random_forest_error(mpg_forest, mpg_X_train.shape,
                                             mpg_X_test)
 
 # Plot error bars for predicted MPG using unbiased variance
diff --git a/examples/plot_mpg_svr.py b/examples/plot_mpg_svr.py
@@ -52,7 +52,7 @@
 plt.show()
 
 # Calculate the variance
-mpg_V_IJ_unbiased = fci.random_forest_error(mpg_bagger, mpg_X_train, mpg_X_test)
+mpg_V_IJ_unbiased = fci.random_forest_error(mpg_bagger, mpg_X_train.shape, mpg_X_test)
 
 # Plot error bars for predicted MPG using unbiased variance
 plt.errorbar(mpg_y_test, mpg_y_hat, yerr=np.sqrt(mpg_V_IJ_unbiased), fmt="o")
diff --git a/examples/plot_spam.py b/examples/plot_spam.py
@@ -44,7 +44,7 @@
 plt.legend()
 
 # Calculate the variance
-spam_V_IJ_unbiased = fci.random_forest_error(spam_RFC, spam_X_train,
+spam_V_IJ_unbiased = fci.random_forest_error(spam_RFC, spam_X_train.shape,
                                              spam_X_test)
 
 # Plot forest prediction for emails and standard deviation for estimates
diff --git a/forestci/forestci.py b/forestci/forestci.py
@@ -90,7 +90,7 @@ def calc_inbag(n_samples, forest):
 
 
 def _core_computation(
-    X_train,
+    X_train_shape,
     X_test,
     inbag,
     pred_centered,
@@ -104,8 +104,8 @@ def _core_computation(
 
     Parameters
     ----------
-    X_train : ndarray
-        An array with shape (n_train_sample, n_features).
+    X_train_shape : tuple (int, int)
+        Shape (n_train_sample, n_features).
 
     X_test : ndarray
         An array with shape (n_test_sample, n_features).
@@ -140,10 +140,10 @@ def _core_computation(
         raise ValueError("If memory_constrained=True, must provide", "memory_limit.")
 
     # Assumes double precision float
-    chunk_size = int((memory_limit * 1e6) / (8.0 * X_train.shape[0]))
+    chunk_size = int((memory_limit * 1e6) / (8.0 * X_train_shape[0]))
 
     if chunk_size == 0:
-        min_limit = 8.0 * X_train.shape[0] / 1e6
+        min_limit = 8.0 * X_train_shape[0] / 1e6
         raise ValueError(
             "memory_limit provided is too small."
             + "For these dimensions, memory_limit must "
@@ -238,7 +238,7 @@ def _centered_prediction_forest(forest, X_test):
 
 def random_forest_error(
     forest,
-    X_train,
+    X_train_shape,
     X_test,
     inbag=None,
     calibrate=True,
@@ -256,9 +256,8 @@ def random_forest_error(
     forest : RandomForest
         Regressor or Classifier object.
 
-    X_train : ndarray
-        An array with shape (n_train_sample, n_features). The design matrix for
-        training data.
+    X_train_shape : tuple (int, int)
+        Shape (n_train_sample, n_features) of the design matrix for training data.
 
     X_test : ndarray
         An array with shape (n_test_sample, n_features). The design matrix
@@ -307,12 +306,12 @@ def random_forest_error(
        of Machine Learning Research vol. 15, pp. 1625-1651, 2014.
     """
     if inbag is None:
-        inbag = calc_inbag(X_train.shape[0], forest)
+        inbag = calc_inbag(X_train_shape[0], forest)
 
     pred_centered = _centered_prediction_forest(forest, X_test)
     n_trees = forest.n_estimators
     V_IJ = _core_computation(
-        X_train, X_test, inbag, pred_centered, n_trees, memory_constrained, memory_limit
+        X_train_shape, X_test, inbag, pred_centered, n_trees, memory_constrained, memory_limit
     )
     V_IJ_unbiased = _bias_correction(V_IJ, inbag, pred_centered, n_trees)
 
@@ -344,7 +343,7 @@ def random_forest_error(
 
         results_ss = random_forest_error(
             new_forest,
-            X_train,
+            X_train_shape,
             X_test,
             calibrate=False,
             memory_constrained=memory_constrained,
diff --git a/forestci/tests/test_forestci.py b/forestci/tests/test_forestci.py
@@ -26,7 +26,7 @@ def test_random_forest_error():
     for ib in [inbag, None]:
         for calibrate in [True, False]:
             V_IJ_unbiased = fci.random_forest_error(
-                forest, X_train, X_test, inbag=ib, calibrate=calibrate
+                forest, X_train.shape, X_test, inbag=ib, calibrate=calibrate
             )
         npt.assert_equal(V_IJ_unbiased.shape[0], y_test.shape[0])
 
@@ -60,7 +60,7 @@ def test_bagging_svr_error():
     for ib in [inbag, None]:
         for calibrate in [True, False]:
             V_IJ_unbiased = fci.random_forest_error(
-                bagger, X_train, X_test, inbag=ib, calibrate=calibrate
+                bagger, X_train.shape, X_test, inbag=ib, calibrate=calibrate
             )
         npt.assert_equal(V_IJ_unbiased.shape[0], y_test.shape[0])
 
@@ -78,7 +78,7 @@ def test_core_computation():
     n_trees = 4
 
     our_vij = fci._core_computation(
-        X_train_ex, X_test_ex, inbag_ex, pred_centered_ex, n_trees
+        X_train_ex.shape, X_test_ex, inbag_ex, pred_centered_ex, n_trees
     )
 
     r_vij = np.concatenate([np.array([112.5, 387.5]) for _ in range(1000)])
@@ -87,7 +87,7 @@ def test_core_computation():
 
     for mc, ml in zip([True, False], [0.01, None]):
         our_vij = fci._core_computation(
-            X_train_ex,
+            X_train_ex.shape,
             X_test_ex,
             inbag_ex,
             pred_centered_ex,
@@ -113,7 +113,7 @@ def test_bias_correction():
     n_trees = 4
 
     our_vij = fci._core_computation(
-        X_train_ex, X_test_ex, inbag_ex, pred_centered_ex, n_trees
+        X_train_ex.shape, X_test_ex, inbag_ex, pred_centered_ex, n_trees
     )
     our_vij_unbiased = fci._bias_correction(
         our_vij, inbag_ex, pred_centered_ex, n_trees
@@ -139,7 +139,7 @@ def test_with_calibration():
         n_trees = 4
         forest = RandomForestRegressor(n_estimators=n_trees)
         forest.fit(X_train, y_train)
-        V_IJ_unbiased = fci.random_forest_error(forest, X_train, X_test)
+        V_IJ_unbiased = fci.random_forest_error(forest, X_train.shape, X_test)
         npt.assert_equal(V_IJ_unbiased.shape[0], y_test.shape[0])