Skip to content

Commit ebdd8ef

Browse files
committed
Cleanup, add documentation
1 parent b461889 commit ebdd8ef

File tree

3 files changed

+30
-17
lines changed

3 files changed

+30
-17
lines changed

ADD_LAZYFUNCS.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Adding (lazy) functions
2+
3+
Once you have written a (public API) function in Blosc2, it is important to:
4+
* Import it from the relevant module in the ``__init__.py`` file
5+
* Add it to the list of functions in ``__all__`` in the ``__init__.py`` file
6+
* If it is present in numpy, add it to the relevant dictionary (``local_ufunc_map``, ``ufunc_map`` ``ufunc_map_1param``) in ``ndarray.py``
7+
8+
Finally, you also need to deal with it correctly within ``shape_utils.py``.
9+
10+
If the function does not change the shape of the output, simply add it to ``elementwise_funcs`` and you're done.
11+
12+
If the function _does_ change the shape of the output, it is likely either a reduction, a constructor, or a linear algebra function and so should be added to one of those lists (``reducers``, ``constructor`` or ``linalg_funcs``). If the function is a reduction, unless you need to handle an argument that is neither ``axis`` nor ``keepdims``, you don't need to do anything else.
13+
If your function is a constructor, you need to ensure it is handled within the ``visit_Call`` function appropriately (if it has a shape argument this is easy, just add it to the list of functions that has ``zeros, zeros_like`` etc.).
14+
15+
For linear algebra functions it is likely you will have to write a bespoke shape handler within the ``linalg_shape`` function. There is also a list ``linalg_attrs`` for attributes which change the shape (currently only ``T`` and ``mT``) should you need to add one. You will probably need to edit the ``validation_patterns`` list at the top of the ``lazyexpr.py`` file to handle these attributes. Just extend the part that has the negative lookahead "(?!real|imag|T|mT|(".
16+
17+
After this, the imports at the top of the ``lazyexpr.py`` should handle things, where an ``eager_funcs`` list is defined to handle eager execution of functions which change the output shape. Finally, in order to handle name changes between NumPy versions 1 and 2, it may be necessary to add aliases for functions within the blocks defined by ``if NUMPY_GE_2_0:`` in ``lazyexpr.py`` and ``ndarray.py``.

src/blosc2/lazyexpr.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
ufunc_map_1param,
5252
)
5353

54-
from .shape_utils import constructors, elementwise_funcs, infer_shape, lin_alg_attrs, lin_alg_funcs, reducers
54+
from .shape_utils import constructors, elementwise_funcs, infer_shape, linalg_attrs, linalg_funcs, reducers
5555

5656
if not blosc2.IS_WASM:
5757
import numexpr
@@ -157,9 +157,9 @@ def ne_evaluate(expression, local_dict=None, **kwargs):
157157
"S": np.str_,
158158
"V": np.bytes_,
159159
}
160-
blosc2_funcs = constructors + lin_alg_funcs + elementwise_funcs + reducers
160+
blosc2_funcs = constructors + linalg_funcs + elementwise_funcs + reducers
161161
# functions that have to be evaluated before chunkwise lazyexpr machinery
162-
eager_funcs = lin_alg_funcs + reducers + ["slice"] + lin_alg_attrs
162+
eager_funcs = linalg_funcs + reducers + ["slice"] + ["." + attr for attr in linalg_attrs]
163163
# Gather all callable functions in numpy
164164
numpy_funcs = {
165165
name
@@ -569,7 +569,7 @@ def compute_smaller_slice(larger_shape, smaller_shape, larger_slice):
569569
validation_patterns = [
570570
r"[\;]", # Flow control characters
571571
r"(^|[^\w])__[\w]+__($|[^\w])", # Dunder methods
572-
r"\.\b(?!real|imag|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|\d*j\b|(sum|prod|min|max|std|mean|var|any|all|where)"
572+
r"\.\b(?!real|imag|T|mT|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|\d*j\b|(sum|prod|min|max|std|mean|var|any|all|where)"
573573
r"\s*\([^)]*\)|[a-zA-Z_]\w*\s*\([^)]*\))", # Attribute patterns
574574
]
575575

@@ -595,10 +595,8 @@ def compute_smaller_slice(larger_shape, smaller_shape, larger_slice):
595595
valid_methods |= {"float32", "float64", "complex64", "complex128"}
596596
valid_methods |= {"bool", "str", "bytes"}
597597
valid_methods |= {
598-
name
599-
for name in dir(blosc2.NDArray)
600-
if callable(getattr(blosc2.NDArray, name)) and not name.startswith("_")
601-
}
598+
name for name in dir(blosc2.NDArray) if not name.startswith("_")
599+
} # allow attributes and methods
602600

603601

604602
def validate_expr(expr: str) -> None:
@@ -621,9 +619,7 @@ def validate_expr(expr: str) -> None:
621619
# Check for forbidden patterns
622620
forbiddens = _blacklist_re.search(skip_quotes)
623621
if forbiddens is not None:
624-
i = forbiddens.span()[0]
625-
if expr[i : i + 2] != ".T" and expr[i : i + 3] != ".mT": # allow tranpose methods
626-
raise ValueError(f"'{expr}' is not a valid expression.")
622+
raise ValueError(f"'{expr}' is not a valid expression.")
627623

628624
# Check for invalid characters not covered by the tokenizer
629625
invalid_chars = re.compile(r"[^\w\s+\-*/%()[].,=<>!&|~^]")

src/blosc2/shape_utils.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
"zeros_like",
8080
]
8181

82-
lin_alg_funcs = [
82+
linalg_funcs = [
8383
"concat",
8484
"diagonal",
8585
"expand_dims",
@@ -94,7 +94,7 @@
9494
"vecdot",
9595
]
9696

97-
lin_alg_attrs = ["T", "mT"]
97+
linalg_attrs = ["T", "mT"]
9898
reducers = ["sum", "prod", "min", "max", "std", "mean", "var", "any", "all", "count_nonzero"]
9999

100100
# All the available constructors and reducers necessary for the (string) expression evaluator
@@ -317,7 +317,7 @@ def elementwise(*args):
317317

318318

319319
# --- Function registry ---
320-
FUNCTIONS = { # ignore out arg
320+
REDUCTIONS = { # ignore out arg
321321
func: lambda x, axis=None, keepdims=False, out=None: reduce_shape(x, axis, keepdims)
322322
for func in reducers
323323
# any unknown function will default to elementwise
@@ -391,7 +391,7 @@ def visit_Call(self, node): # noqa : C901
391391
kwargs[kw.arg] = self._lookup_value(kw.value)
392392

393393
# ------- handle linear algebra ---------------
394-
if base_name in lin_alg_funcs:
394+
if base_name in linalg_funcs:
395395
return linalg_shape(base_name, args, kwargs)
396396

397397
# ------- handle constructors ---------------
@@ -484,8 +484,8 @@ def visit_Call(self, node): # noqa : C901
484484
slices = [self._eval_slice(slice_arg)]
485485
return slice_shape(obj_shape, slices)
486486

487-
if base_name in FUNCTIONS:
488-
return FUNCTIONS[base_name](*args, **kwargs)
487+
if base_name in REDUCTIONS:
488+
return REDUCTIONS[base_name](*args, **kwargs)
489489

490490
shapes = [s for s in args if s is not None]
491491
if base_name not in elementwise_funcs:

0 commit comments

Comments
 (0)