New version for reducing memory consumption

FrancescAlted · FrancescAlted · commit cb6c7514c604 · 2025-04-18T09:26:13.000+02:00
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
@@ -2852,6 +2852,7 @@ def compute(self, item=None, **kwargs):
 
     def __getitem__(self, item):
         if self.chunked_eval:
+            # TODO: as this creates a big array, this can potentially consume a lot of memory
             output = np.empty(self.shape, self.dtype)
             # It is important to pass kwargs here, because chunks can be used internally
             chunked_eval(self.func, self.inputs_dict, item, _getitem=True, _output=output, **self.kwargs)
diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py
@@ -11,6 +11,7 @@
 import builtins
 import inspect
 import math
+import tempfile
 from collections import OrderedDict, namedtuple
 from functools import reduce
 from itertools import product
@@ -3246,14 +3247,7 @@ def arange_fill(inputs, output, offset):
         # C order is guaranteed, and no reshape is needed
         return lazyarr.compute(**kwargs)
 
-    # In principle, when c_order is False, this would be enough:
-    # return reshape(lazyarr, shape, c_order=c_order, **kwargs)
-    # so that an intermediate NDArray wouldn't be needed, which is more memory efficient.
-    # However, benchmarks show that performance is better with the approach below.
-    # Incidentally, not requiring C order can be quite illustrative for the user to
-    # understand how the process of computing lazy arrays (and chunking) works.
-    larr = lazyarr.compute()  # intermediate array
-    return reshape(larr, shape, c_order=c_order, **kwargs)
+    return reshape(lazyarr, shape, c_order=c_order, **kwargs)
 
 
 # Define a numpy linspace-like function
@@ -3316,10 +3310,7 @@ def linspace_fill(inputs, output, offset):
         # C order is guaranteed, and no reshape is needed
         return lazyarr.compute(**kwargs)
 
-    # In principle, when c_order is False, the intermediate array wouldn't be needed,
-    # but this is faster; see arange() for more details.
-    larr = lazyarr.compute()  # intermediate array
-    return reshape(larr, shape, c_order=c_order, **kwargs)
+    return reshape(lazyarr, shape, c_order=c_order, **kwargs)
 
 
 def eye(N, M=None, k=0, dtype=np.float64, **kwargs: Any):
@@ -3427,10 +3418,12 @@ def iter_fill(inputs, output, offset):
         # C order is guaranteed, and no reshape is needed
         return lazyarr.compute(**kwargs)
 
-    # In principle, when c_order is False, the intermediate array wouldn't be needed,
-    # but this is faster; see arange() for more details.
-    larr = lazyarr.compute()  # intermediate array
-    return reshape(larr, shape, c_order=c_order, **kwargs)
+    # TODO: in principle, the next should work, but tests still fail:
+    # return reshape(lazyarr, shape, c_order=c_order, **kwargs)
+    # Creating a temporary file is a workaround for the issue
+    with tempfile.NamedTemporaryFile(suffix=".b2nd", delete=True) as tmp_file:
+        larr = lazyarr.compute(urlpath=tmp_file.name, mode="w")  # intermediate array
+        return reshape(larr, shape, c_order=c_order, **kwargs)
 
 
 def frombuffer(