Skip to content

Commit cb6c751

Browse files
committed
New version for reducing memory consumption
1 parent b5412a1 commit cb6c751

File tree

2 files changed

+10
-16
lines changed

2 files changed

+10
-16
lines changed

src/blosc2/lazyexpr.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2852,6 +2852,7 @@ def compute(self, item=None, **kwargs):
28522852

28532853
def __getitem__(self, item):
28542854
if self.chunked_eval:
2855+
# TODO: as this creates a big array, this can potentially consume a lot of memory
28552856
output = np.empty(self.shape, self.dtype)
28562857
# It is important to pass kwargs here, because chunks can be used internally
28572858
chunked_eval(self.func, self.inputs_dict, item, _getitem=True, _output=output, **self.kwargs)

src/blosc2/ndarray.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import builtins
1212
import inspect
1313
import math
14+
import tempfile
1415
from collections import OrderedDict, namedtuple
1516
from functools import reduce
1617
from itertools import product
@@ -3246,14 +3247,7 @@ def arange_fill(inputs, output, offset):
32463247
# C order is guaranteed, and no reshape is needed
32473248
return lazyarr.compute(**kwargs)
32483249

3249-
# In principle, when c_order is False, this would be enough:
3250-
# return reshape(lazyarr, shape, c_order=c_order, **kwargs)
3251-
# so that an intermediate NDArray wouldn't be needed, which is more memory efficient.
3252-
# However, benchmarks show that performance is better with the approach below.
3253-
# Incidentally, not requiring C order can be quite illustrative for the user to
3254-
# understand how the process of computing lazy arrays (and chunking) works.
3255-
larr = lazyarr.compute() # intermediate array
3256-
return reshape(larr, shape, c_order=c_order, **kwargs)
3250+
return reshape(lazyarr, shape, c_order=c_order, **kwargs)
32573251

32583252

32593253
# Define a numpy linspace-like function
@@ -3316,10 +3310,7 @@ def linspace_fill(inputs, output, offset):
33163310
# C order is guaranteed, and no reshape is needed
33173311
return lazyarr.compute(**kwargs)
33183312

3319-
# In principle, when c_order is False, the intermediate array wouldn't be needed,
3320-
# but this is faster; see arange() for more details.
3321-
larr = lazyarr.compute() # intermediate array
3322-
return reshape(larr, shape, c_order=c_order, **kwargs)
3313+
return reshape(lazyarr, shape, c_order=c_order, **kwargs)
33233314

33243315

33253316
def eye(N, M=None, k=0, dtype=np.float64, **kwargs: Any):
@@ -3427,10 +3418,12 @@ def iter_fill(inputs, output, offset):
34273418
# C order is guaranteed, and no reshape is needed
34283419
return lazyarr.compute(**kwargs)
34293420

3430-
# In principle, when c_order is False, the intermediate array wouldn't be needed,
3431-
# but this is faster; see arange() for more details.
3432-
larr = lazyarr.compute() # intermediate array
3433-
return reshape(larr, shape, c_order=c_order, **kwargs)
3421+
# TODO: in principle, the next should work, but tests still fail:
3422+
# return reshape(lazyarr, shape, c_order=c_order, **kwargs)
3423+
# Creating a temporary file is a workaround for the issue
3424+
with tempfile.NamedTemporaryFile(suffix=".b2nd", delete=True) as tmp_file:
3425+
larr = lazyarr.compute(urlpath=tmp_file.name, mode="w") # intermediate array
3426+
return reshape(larr, shape, c_order=c_order, **kwargs)
34343427

34353428

34363429
def frombuffer(

0 commit comments

Comments
 (0)