Adding a batch_size to predict method to split into smaller mini-batches if required

ilblackdragon · ilblackdragon · commit c977e835bd92 · 2016-02-08T23:10:02.000-08:00
diff --git a/skflow/estimators/base.py b/skflow/estimators/base.py
@@ -239,11 +239,12 @@ def partial_fit(self, X, y):
         """
         return self.fit(X, y)
 
-    def _predict(self, X):
+    def _predict(self, X, batch_size=-1):
         if not self._initialized:
             raise NotFittedError()
         self._graph.add_to_collection("IS_TRAINING", False)
-        predict_data_feeder = setup_predict_data_feeder(X)
+        predict_data_feeder = setup_predict_data_feeder(
+            X, batch_size=batch_size)
         preds = []
         dropouts = self._graph.get_collection(DROPOUTS)
         feed_dict = {prob: 1.0 for prob in dropouts}
@@ -254,7 +255,7 @@ def _predict(self, X):
                 feed_dict))
         return np.concatenate(preds, axis=0)
 
-    def predict(self, X, axis=1):
+    def predict(self, X, axis=1, batch_size=-1):
         """Predict class or regression for X.
 
         For a classification model, the predicted class for each sample in X is
@@ -263,27 +264,35 @@ def predict(self, X, axis=1):
 
         Args:
             X: array-like matrix, [n_samples, n_features...] or iterator.
+            axis: Which axis to argmax for classification. 
+                  By default axis 1 (next after batch) is used.
+                  Use 2 for sequence predictions.
+            batch_size: If test set is too big, use batch size to split
+                        it into mini batches. By default full dataset is used.
 
         Returns:
             y: array of shape [n_samples]. The predicted classes or predicted
             value.
         """
-        pred = self._predict(X)
+        pred = self._predict(X, batch_size=batch_size)
         if self.n_classes < 2:
             return pred
         return pred.argmax(axis=axis)
 
-    def predict_proba(self, X):
+    def predict_proba(self, X, batch_size=-1):
         """Predict class probability of the input samples X.
 
         Args:
             X: array-like matrix, [n_samples, n_features...] or iterator.
+            batch_size: If test set is too big, use batch size to split
+                        it into mini batches. By default full dataset is used.
 
         Returns:
             y: array of shape [n_samples, n_classes]. The predicted
             probabilities for each class.
-        """
-        return self._predict(X)
+
+       """
+        return self._predict(X, batch_size=batch_size)
 
     def get_tensor(self, name):
         """Returns tensor by name.