Skip to content

Commit 6e12a7d

Browse files
Merge pull request #413 from Blosc/addLazySlice
Add slicing to string lazyexprs
2 parents d9fe0aa + c203601 commit 6e12a7d

File tree

2 files changed

+85
-4
lines changed

2 files changed

+85
-4
lines changed

src/blosc2/lazyexpr.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def ne_evaluate(expression, local_dict=None, **kwargs):
116116
constructors = ("arange", "linspace", "fromiter", "zeros", "ones", "empty", "full", "frombuffer")
117117
# Note that, as reshape is accepted as a method too, it should always come last in the list
118118
constructors += ("reshape",)
119-
reducers = ("sum", "prod", "min", "max", "std", "mean", "var", "any", "all")
119+
reducers = ("sum", "prod", "min", "max", "std", "mean", "var", "any", "all", "slice")
120120

121121
functions = [
122122
"sin",
@@ -541,7 +541,7 @@ def compute_smaller_slice(larger_shape, smaller_shape, larger_slice):
541541

542542
# Define the patterns for validation
543543
validation_patterns = [
544-
r"[\;\[\:]", # Flow control characters
544+
r"[\;]", # Flow control characters
545545
r"(^|[^\w])__[\w]+__($|[^\w])", # Dunder methods
546546
r"\.\b(?!real|imag|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|\d*j\b|(sum|prod|min|max|std|mean|var|any|all|where)"
547547
r"\s*\([^)]*\)|[a-zA-Z_]\w*\s*\([^)]*\))", # Attribute patterns
@@ -551,7 +551,20 @@ def compute_smaller_slice(larger_shape, smaller_shape, larger_slice):
551551
_blacklist_re = re.compile("|".join(validation_patterns))
552552

553553
# Define valid method names
554-
valid_methods = {"sum", "prod", "min", "max", "std", "mean", "var", "any", "all", "where", "reshape"}
554+
valid_methods = {
555+
"sum",
556+
"prod",
557+
"min",
558+
"max",
559+
"std",
560+
"mean",
561+
"var",
562+
"any",
563+
"all",
564+
"where",
565+
"reshape",
566+
"slice",
567+
}
555568
valid_methods |= {"int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"}
556569
valid_methods |= {"float32", "float64", "complex64", "complex128"}
557570
valid_methods |= {"bool", "str", "bytes"}
@@ -579,7 +592,7 @@ def validate_expr(expr: str) -> None:
579592
raise ValueError(f"'{expr}' is not a valid expression.")
580593

581594
# Check for invalid characters not covered by the tokenizer
582-
invalid_chars = re.compile(r"[^\w\s+\-*/%().,=<>!&|~^]")
595+
invalid_chars = re.compile(r"[^\w\s+\-*/%()[].,=<>!&|~^]")
583596
if invalid_chars.search(skip_quotes) is not None:
584597
invalid_chars = invalid_chars.findall(skip_quotes)
585598
raise ValueError(f"Expression {expr} contains invalid characters: {invalid_chars}")
@@ -731,6 +744,41 @@ def visit_Call(self, node):
731744
return newexpression, newoperands
732745

733746

747+
def convert_to_slice(expression):
748+
"""
749+
Takes expression and converts all instances of [] to .slice(....)
750+
751+
Parameters
752+
----------
753+
expression: str
754+
755+
Returns
756+
-------
757+
new_expr : str
758+
"""
759+
760+
new_expr = ""
761+
skip_to_char = 0
762+
for i, expr_i in enumerate(expression):
763+
if i < skip_to_char:
764+
continue
765+
if expr_i == "[":
766+
k = expression[i:].find("]") # start checking from after [
767+
slice_convert = expression[i : i + k + 1] # include [ and ]
768+
slicer = eval(f"np.s_{slice_convert}")
769+
slicer = (slicer,) if isinstance(slicer, slice) else slicer # standardise to tuple
770+
if any(isinstance(el, str) for el in slicer): # handle fields
771+
raise ValueError("Cannot handle fields for slicing lazy expressions.")
772+
slicer = str(slicer)
773+
# use slice so that lazyexpr uses blosc arrays internally
774+
# (and doesn't decompress according to getitem syntax)
775+
new_expr += ".slice(" + slicer + ")"
776+
skip_to_char = i + k + 1
777+
else:
778+
new_expr += expr_i
779+
return new_expr
780+
781+
734782
class TransformNumpyCalls(ast.NodeTransformer):
735783
def __init__(self):
736784
self.replacements = {}
@@ -2543,6 +2591,7 @@ def find_args(expr):
25432591
def _compute_expr(self, item, kwargs): # noqa: C901
25442592
if any(method in self.expression for method in reducers):
25452593
# We have reductions in the expression (probably coming from a string lazyexpr)
2594+
# Also includes slice
25462595
_globals = get_expr_globals(self.expression)
25472596
lazy_expr = eval(self.expression, _globals, self.operands)
25482597
if not isinstance(lazy_expr, blosc2.LazyExpr):
@@ -2775,6 +2824,7 @@ def save(self, urlpath=None, **kwargs):
27752824
def _new_expr(cls, expression, operands, guess, out=None, where=None, ne_args=None):
27762825
# Validate the expression
27772826
validate_expr(expression)
2827+
expression = convert_to_slice(expression)
27782828
if guess:
27792829
# The expression has been validated, so we can evaluate it
27802830
# in guessing mode to avoid computing reductions

tests/ndarray/test_reductions.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,34 @@ def test_reduction_index():
428428
assert arr[:10].shape == (10,)
429429
assert arr[0].shape == (1,)
430430
assert arr.shape == newarr.shape
431+
432+
433+
def test_slice_lazy():
434+
shape = (20, 20)
435+
a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape)
436+
arr = blosc2.lazyexpr("anarr.slice(slice(10,15)) + 1", {"anarr": a})
437+
newarr = arr.compute()
438+
np.testing.assert_allclose(newarr[:], a.slice(slice(10, 15))[:] + 1)
439+
440+
441+
def test_slicebrackets_lazy():
442+
shape = (20, 20)
443+
a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape)
444+
arr = blosc2.lazyexpr("anarr[10:15] + 1", {"anarr": a})
445+
newarr = arr.compute()
446+
np.testing.assert_allclose(newarr[:], a[10:15] + 1)
447+
448+
# Try with getitem
449+
a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape)
450+
arr = blosc2.lazyexpr("anarr[10:15] + 1", {"anarr": a})
451+
newarr = arr[:3]
452+
res = a[10:15] + 1
453+
np.testing.assert_allclose(newarr, res[:3])
454+
455+
arr = blosc2.lazyexpr("anarr[10:15, 2:9] + 1", {"anarr": a})
456+
newarr = arr.compute()
457+
np.testing.assert_allclose(newarr[:], a[10:15, 2:9] + 1)
458+
459+
arr = blosc2.lazyexpr("anarr[10:15][2:9] + 1", {"anarr": a})
460+
newarr = arr.compute()
461+
np.testing.assert_allclose(newarr[:], a[10:15][2:9] + 1)

0 commit comments

Comments
 (0)