Wrap batch dim generation in a flag

cmdupuis3 · cmdupuis3 · commit 1322eff50d00 · 2021-11-18T19:55:12.000Z
diff --git a/xbatcher/generators.py b/xbatcher/generators.py
@@ -53,10 +53,13 @@ def _drop_input_dims(ds, input_dims, suffix='_input'):
             out.coords[dim] = newdim, ds[dim].data, ds[dim].attrs
     return out
 
-def _maybe_stack_batch_dims(ds, input_dims, stacked_dim_name='sample'):
+def _maybe_stack_batch_dims(ds, input_dims, squeeze_batch_dim, stacked_dim_name='sample'):
     batch_dims = [d for d in ds.dims if d not in input_dims]
     if len(batch_dims) < 2:
-        return ds.expand_dims(stacked_dim_name, 0)
+        if(squeeze_batch_dim):
+            return ds
+        else:
+            return ds.expand_dims(stacked_dim_name, 0)
     ds_stack = ds.stack(**{stacked_dim_name: batch_dims})
     # ensure correct order
     dim_order = (stacked_dim_name,) + tuple(input_dims)
@@ -89,6 +92,10 @@ class BatchGenerator:
     preload_batch : bool, optional
         If ``True``, each batch will be loaded into memory before reshaping /
         processing, triggering any dask arrays to be computed.
+    squeeze_batch_dim : bool, optional
+        If ``False", each batch's dataset will have a "batch" dimension of size 1
+        prepended to the array. This functionality is useful for interoperability
+        with Keras / Tensorflow.
 
     Yields
     ------
@@ -104,6 +111,7 @@ def __init__(
         batch_dims={},
         concat_input_dims=False,
         preload_batch=True,
+        squeeze_batch_dim=True
     ):
 
         self.ds = _as_xarray_dataset(ds)
@@ -113,6 +121,7 @@ def __init__(
         self.batch_dims = OrderedDict(batch_dims)
         self.concat_input_dims = concat_input_dims
         self.preload_batch = preload_batch
+        self.squeeze_batch_dim = squeeze_batch_dim
 
     def __iter__(self):
         for ds_batch in self._iterate_batch_dims(self.ds):
@@ -131,11 +140,11 @@ def __iter__(self):
                 new_input_dims = [
                     dim + new_dim_suffix for dim in self.input_dims
                 ]
-                yield _maybe_stack_batch_dims(dsc, new_input_dims)
+                yield _maybe_stack_batch_dims(dsc, new_input_dims, self.squeeze_batch_dim)
             else:
                 for ds_input in input_generator:
                     yield _maybe_stack_batch_dims(
-                        ds_input, list(self.input_dims)
+                        ds_input, list(self.input_dims), self.squeeze_batch_dim
                     )
 
     def _iterate_batch_dims(self, ds):