Skip to content

Commit 132a9ca

Browse files
committed
slices_eval2() -> slices_eval() and remove the version of the latter
1 parent 22b023b commit 132a9ca

File tree

1 file changed

+2
-265
lines changed

1 file changed

+2
-265
lines changed

src/blosc2/lazyexpr.py

Lines changed: 2 additions & 265 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,263 +1376,6 @@ def slices_eval( # noqa: C901
13761376
13771377
This is also flexible enough to work with operands of different shapes.
13781378
1379-
Parameters
1380-
----------
1381-
expression: str or callable
1382-
The expression or user-defined (udf) to evaluate.
1383-
operands: dict
1384-
A dictionary containing the operands for the expression.
1385-
getitem: bool, optional
1386-
Indicates whether the expression is being evaluated for a getitem operation.
1387-
_slice: slice, list of slices, optional
1388-
If provided, only the chunks that intersect with this slice
1389-
will be evaluated.
1390-
kwargs: Any, optional
1391-
Additional keyword arguments that are supported by the :func:`empty` constructor.
1392-
1393-
Returns
1394-
-------
1395-
:ref:`NDArray` or np.ndarray
1396-
The output array.
1397-
"""
1398-
out: blosc2.NDArray | None = kwargs.pop("_output", None)
1399-
ne_args: dict = kwargs.pop("_ne_args", {})
1400-
if ne_args is None:
1401-
ne_args = {}
1402-
chunks = kwargs.get("chunks")
1403-
where: dict | None = kwargs.pop("_where_args", None)
1404-
_indices = kwargs.pop("_indices", False)
1405-
if _indices and (not where or len(where) != 1):
1406-
raise NotImplementedError("Indices can only be used with one where condition")
1407-
_order = kwargs.pop("_order", None)
1408-
if _order is not None and not isinstance(_order, list):
1409-
# Always use a list for _order
1410-
_order = [_order]
1411-
1412-
dtype = kwargs.pop("dtype", None)
1413-
if out is None:
1414-
# Compute the shape and chunks of the output array, including broadcasting
1415-
shape = compute_broadcast_shape(operands.values())
1416-
else:
1417-
shape = out.shape
1418-
1419-
# We need to keep the original _slice arg, for allowing a final getitem (if necessary)
1420-
orig_slice = _slice
1421-
1422-
if chunks is None:
1423-
# Either out, or operand with `chunks`, can be used to get the chunks
1424-
operands_ = [o for o in operands.values() if hasattr(o, "chunks") and o.shape == shape]
1425-
if out is not None and hasattr(out, "chunks"):
1426-
chunks = out.chunks
1427-
elif len(operands_) > 0:
1428-
# Use the first operand with chunks to get the necessary chunking information
1429-
chunks = operands_[0].chunks
1430-
else:
1431-
# Typically, we enter here when using UDFs, and out is a NumPy array.
1432-
# Use operands to get the shape and chunks
1433-
# operand will be a 'fake' NDArray just to get the necessary chunking information
1434-
temp = blosc2.empty(shape, dtype=dtype)
1435-
chunks = temp.chunks
1436-
del temp
1437-
1438-
# Get the indexes for chunks
1439-
chunks_idx, nchunks = get_chunks_idx(shape, chunks)
1440-
# The starting point for the indices of the inputs
1441-
leninputs = compute_start_index(shape, orig_slice) if orig_slice is not None else 0
1442-
lenout = 0
1443-
behaved = False
1444-
indices_ = None
1445-
chunk_indices = None
1446-
dtype_ = np.int64 if _indices else dtype
1447-
if _order is not None:
1448-
# Get the dtype of the array to sort
1449-
dtype_ = operands["_where_x"].dtype
1450-
# Now, use only the fields that are necessary for the sorting
1451-
dtype_ = np.dtype([(f, dtype_[f]) for f in _order])
1452-
1453-
# Iterate over the operands and get the chunks
1454-
chunk_operands = {}
1455-
for nchunk in range(nchunks):
1456-
coords = tuple(np.unravel_index(nchunk, chunks_idx))
1457-
# Calculate the shape of the (chunk) slice_ (specially at the end of the array)
1458-
slice_ = tuple(
1459-
slice(c * s, min((c + 1) * s, shape[i]))
1460-
for i, (c, s) in enumerate(zip(coords, chunks, strict=True))
1461-
)
1462-
# Check whether current slice_ intersects with _slice
1463-
checker = _slice.item() if hasattr(_slice, "item") else _slice # can't use != when _slice is np.int
1464-
if checker is not None and checker != ():
1465-
# Ensure that _slice is of type slice
1466-
key = ndindex.ndindex(_slice).expand(shape).raw
1467-
_slice = tuple(k if isinstance(k, slice) else slice(k, k + 1, None) for k in key)
1468-
# Ensure that slices do not have any None as start or stop
1469-
_slice = tuple(slice(s.start or 0, s.stop or shape[i], s.step) for i, s in enumerate(_slice))
1470-
slice_ = tuple(slice(s.start or 0, s.stop or shape[i], s.step) for i, s in enumerate(slice_))
1471-
intersects = do_slices_intersect(_slice, slice_)
1472-
if not intersects:
1473-
continue
1474-
# Compute the part of the slice_ that intersects with _slice
1475-
slice_ = tuple(
1476-
slice(max(s1.start, s2.start), min(s1.stop, s2.stop))
1477-
for s1, s2 in zip(slice_, _slice, strict=True)
1478-
)
1479-
slice_shape = tuple(s.stop - s.start for s in slice_)
1480-
len_chunk = math.prod(slice_shape)
1481-
1482-
# Get the starts and stops for the slice
1483-
starts = [s.start if s.start is not None else 0 for s in slice_]
1484-
stops = [s.stop if s.stop is not None else sh for s, sh in zip(slice_, slice_shape, strict=True)]
1485-
1486-
# Get the slice of each operand
1487-
for key, value in operands.items():
1488-
if np.isscalar(value):
1489-
chunk_operands[key] = value
1490-
continue
1491-
if value.shape == ():
1492-
chunk_operands[key] = value[()]
1493-
continue
1494-
if check_smaller_shape(value, shape, slice_shape):
1495-
# We need to fetch the part of the value that broadcasts with the operand
1496-
smaller_slice = compute_smaller_slice(shape, value.shape, slice_)
1497-
chunk_operands[key] = value[smaller_slice]
1498-
continue
1499-
# If key is in operands, we can reuse the buffer
1500-
if (
1501-
key in chunk_operands
1502-
and slice_shape == chunk_operands[key].shape
1503-
and isinstance(value, blosc2.NDArray)
1504-
):
1505-
value.get_slice_numpy(chunk_operands[key], (starts, stops))
1506-
continue
1507-
1508-
chunk_operands[key] = value[slice_]
1509-
1510-
# Evaluate the expression using chunks of operands
1511-
1512-
if callable(expression):
1513-
result = np.empty(slice_shape, dtype=out.dtype)
1514-
# Call the udf directly and use result as the output array
1515-
offset = tuple(s.start for s in slice_)
1516-
expression(tuple(chunk_operands.values()), result, offset=offset)
1517-
out[slice_] = result
1518-
continue
1519-
1520-
if where is None:
1521-
result = ne_evaluate(expression, chunk_operands, **ne_args)
1522-
else:
1523-
# Apply the where condition (in result)
1524-
if len(where) == 2:
1525-
# x = chunk_operands["_where_x"]
1526-
# y = chunk_operands["_where_y"]
1527-
# result = np.where(result, x, y)
1528-
# numexpr is a bit faster than np.where, and we can fuse operations in this case
1529-
new_expr = f"where({expression}, _where_x, _where_y)"
1530-
result = ne_evaluate(new_expr, chunk_operands, **ne_args)
1531-
elif len(where) == 1:
1532-
result = ne_evaluate(expression, chunk_operands, **ne_args)
1533-
if _indices or _order:
1534-
# Return indices only makes sense when the where condition is a tuple with one element
1535-
# and result is a boolean array
1536-
x = chunk_operands["_where_x"]
1537-
if len(x.shape) > 1:
1538-
raise ValueError("indices() and sort() only support 1D arrays")
1539-
if result.dtype != np.bool_:
1540-
raise ValueError("indices() and sort() only support bool conditions")
1541-
indices = np.arange(leninputs, leninputs + len_chunk, dtype=np.int64).reshape(
1542-
slice_shape
1543-
)
1544-
if _order:
1545-
# We need to cumulate all the fields in _order, as well as indices
1546-
chunk_indices = indices[result]
1547-
result = x[_order][result]
1548-
else:
1549-
result = indices[result]
1550-
leninputs += len_chunk
1551-
else:
1552-
x = chunk_operands["_where_x"]
1553-
result = x[result]
1554-
else:
1555-
raise ValueError("The where condition must be a tuple with one or two elements")
1556-
1557-
if out is None:
1558-
shape_ = shape
1559-
if where is not None and len(where) < 2:
1560-
# The result is a linear array
1561-
shape_ = math.prod(shape)
1562-
if getitem or _order:
1563-
out = np.empty(shape_, dtype=dtype_)
1564-
if _order:
1565-
indices_ = np.empty(shape_, dtype=np.int64)
1566-
else:
1567-
if "chunks" not in kwargs and (where is None or len(where) == 2):
1568-
# Let's use the same chunks as the first operand (it could have been automatic too)
1569-
out = blosc2.empty(shape_, chunks=chunks, dtype=dtype_, **kwargs)
1570-
elif "chunks" in kwargs and (where is not None and len(where) < 2 and len(shape_) > 1):
1571-
# Remove the chunks argument if the where condition is not a tuple with two elements
1572-
kwargs.pop("chunks")
1573-
out = blosc2.empty(shape_, dtype=dtype_, **kwargs)
1574-
else:
1575-
out = blosc2.empty(shape_, dtype=dtype_, **kwargs)
1576-
# Check if the in out partitions are well-behaved (i.e. no padding)
1577-
behaved = blosc2.are_partitions_behaved(out.shape, out.chunks, out.blocks)
1578-
1579-
if where is None or len(where) == 2:
1580-
if behaved and result.shape == out.chunks and result.dtype == out.dtype:
1581-
# Fast path
1582-
out.schunk.update_data(nchunk, result, copy=False)
1583-
else:
1584-
out[slice_] = result
1585-
elif len(where) == 1:
1586-
lenres = len(result)
1587-
out[lenout : lenout + lenres] = result
1588-
if _order is not None:
1589-
indices_[lenout : lenout + lenres] = chunk_indices
1590-
lenout += lenres
1591-
else:
1592-
raise ValueError("The where condition must be a tuple with one or two elements")
1593-
1594-
if where is not None and len(where) < 2: # Don't need to take orig_slice since filled up from 0 index
1595-
if _order is not None:
1596-
# argsort the result following _order
1597-
new_order = np.argsort(out[:lenout])
1598-
# And get the corresponding indices in array
1599-
out = indices_[new_order]
1600-
# Cap the output array to the actual length
1601-
if isinstance(out, np.ndarray):
1602-
out = out[:lenout]
1603-
else:
1604-
out.resize((lenout,))
1605-
1606-
else: # Need to take orig_slice since filled up array according to slice_ for each chunk
1607-
if orig_slice is not None:
1608-
if isinstance(out, np.ndarray):
1609-
out = out[orig_slice]
1610-
if _order is not None:
1611-
indices_ = indices_[orig_slice]
1612-
elif isinstance(out, blosc2.NDArray):
1613-
# It *seems* better to choose an automatic chunks and blocks for the output array
1614-
# out = out.slice(orig_slice, chunks=out.chunks, blocks=out.blocks)
1615-
out = out.slice(orig_slice)
1616-
else:
1617-
raise ValueError("The output array is not a NumPy array or a NDArray")
1618-
1619-
return out
1620-
1621-
1622-
def slices_eval2( # noqa: C901
1623-
expression: str | Callable[[tuple, np.ndarray, tuple[int]], None],
1624-
operands: dict,
1625-
getitem: bool,
1626-
_slice=None,
1627-
**kwargs,
1628-
) -> blosc2.NDArray | np.ndarray:
1629-
"""Evaluate the expression in chunks of operands.
1630-
1631-
This function can handle operands with different chunk shapes and
1632-
can evaluate only a slice of the output array if needed.
1633-
1634-
This is also flexible enough to work with operands of different shapes.
1635-
16361379
Parameters
16371380
----------
16381381
expression: str or callable
@@ -1674,7 +1417,6 @@ def slices_eval2( # noqa: C901
16741417
# Compute the shape and chunks of the output array, including broadcasting
16751418
shape = compute_broadcast_shape(operands.values())
16761419
if _slice is not None:
1677-
# print("shape abans:", shape)
16781420
# Remove the step parts from the slice, as code below does not support it
16791421
# First ensure _slice is a tuple, even if it's a single slice
16801422
_slice_ = _slice if isinstance(_slice, tuple) else (_slice,)
@@ -1686,7 +1428,6 @@ def slices_eval2( # noqa: C901
16861428
for i, s in enumerate(_slice_)
16871429
)
16881430
shape_slice = compute_slice_shape(shape, _slice_, dont_squeeze=True)
1689-
# print("shape despres:", shape_slice)
16901431
else:
16911432
shape = out.shape
16921433

@@ -2314,9 +2055,7 @@ def chunked_eval( # noqa: C901
23142055
if getitem and (where is None or len(where) == 2) and not callable(expression):
23152056
# If we are using getitem, we can still use some optimizations
23162057
return slices_eval_getitem(expression, operands, _slice=item, **kwargs)
2317-
# return slices_eval(expression, operands, getitem=getitem, _slice=item, **kwargs)
2318-
# The next is an improved version of slices_eval that consumes less memory
2319-
return slices_eval2(expression, operands, getitem=getitem, _slice=item, **kwargs)
2058+
return slices_eval(expression, operands, getitem=getitem, _slice=item, **kwargs)
23202059

23212060
if fast_path:
23222061
if getitem:
@@ -2330,9 +2069,7 @@ def chunked_eval( # noqa: C901
23302069
# a blosc2.NDArray
23312070
return fast_eval(expression, operands, getitem=False, **kwargs)
23322071

2333-
# res = slices_eval(expression, operands, getitem=getitem, _slice=item, **kwargs)
2334-
# The next is an improved version of slices_eval that consumes less memory
2335-
res = slices_eval2(expression, operands, getitem=getitem, _slice=item, **kwargs)
2072+
res = slices_eval(expression, operands, getitem=getitem, _slice=item, **kwargs)
23362073

23372074
finally:
23382075
# Deactivate cache for NDField instances

0 commit comments

Comments
 (0)