Skip to content

Commit 98ba424

Browse files
Merge pull request #487 from Blosc/array-protocol
Array protocol
2 parents c4a9bff + b421075 commit 98ba424

File tree

9 files changed

+361
-315
lines changed

9 files changed

+361
-315
lines changed

doc/reference/array.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
.. _Array:
2+
3+
Array
4+
=====
5+
6+
Minimal typing protocol for array-like objects compatible with blosc2.
7+
8+
This protocol describes the basic interface required by blosc2 arrays.
9+
It is implemented by blosc2 classes (:ref:`NDArray`, :ref:`NDField`,
10+
:ref:`LazyArray`, :ref:`C2Array`, :ref:`ProxyNDSource`...)
11+
and is compatible with NumPy arrays and other array-like containers
12+
(e.g., PyTorch, TensorFlow, Dask, Zarr, ...).
13+
14+
.. currentmodule:: blosc2
15+
16+
.. autoclass:: Array
17+
18+
:Special Methods:
19+
20+
.. autosummary::
21+
22+
__len__
23+
__getitem__

doc/reference/classes.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@ Main Classes
77
------------
88
.. autosummary::
99

10-
SChunk
1110
NDArray
1211
NDField
1312
LazyArray
1413
C2Array
14+
Array
15+
SChunk
1516
DictStore
1617
TreeStore
1718
EmbedStore
@@ -23,14 +24,15 @@ Main Classes
2324
.. toctree::
2425
:maxdepth: 1
2526

26-
schunk
2727
ndarray
2828
ndfield
29+
lazyarray
30+
c2array
31+
array
32+
schunk
2933
dict_store
3034
tree_store
3135
embed_store
32-
lazyarray
33-
c2array
3436
proxy
3537
proxysource
3638
proxyndsource

src/blosc2/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ def _raise(exc):
392392
)
393393

394394
from .ndarray import (
395+
Array,
395396
NDArray,
396397
NDField,
397398
Operand,

src/blosc2/dict_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def estore(self) -> EmbedStore:
214214
"""Access the underlying EmbedStore."""
215215
return self._estore
216216

217-
def __setitem__(self, key: str, value: np.ndarray | blosc2.NDArray | SChunk | C2Array) -> None:
217+
def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
218218
"""Add a node to the DictStore."""
219219
if isinstance(value, np.ndarray):
220220
value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams)

src/blosc2/embed_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def _ensure_capacity(self, needed_bytes: int) -> None:
159159
new_size = max(required_size, int(self._store.shape[0] * 1.5))
160160
self._store.resize((new_size,))
161161

162-
def __setitem__(self, key: str, value: np.ndarray | blosc2.NDArray | SChunk | C2Array) -> None:
162+
def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
163163
"""Add a node to the embed store."""
164164
if self.mode == "r":
165165
raise ValueError("Cannot set items in read-only mode.")

src/blosc2/lazyexpr.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray:
288288
@abstractmethod
289289
def compute(self, item: slice | list[slice] | None = None, **kwargs: Any) -> blosc2.NDArray:
290290
"""
291-
Return an :ref:`NDArray` containing the evaluation of the :ref:`LazyArray`.
291+
Return a :ref:`NDArray` containing the evaluation of the :ref:`LazyArray`.
292292
293293
Parameters
294294
----------
@@ -337,9 +337,9 @@ def compute(self, item: slice | list[slice] | None = None, **kwargs: Any) -> blo
337337
pass
338338

339339
@abstractmethod
340-
def __getitem__(self, item: int | slice | Sequence[slice]) -> blosc2.NDArray:
340+
def __getitem__(self, item: int | slice | Sequence[slice]) -> np.ndarray:
341341
"""
342-
Return a NumPy.ndarray containing the evaluation of the :ref:`LazyArray`.
342+
Return a numpy.ndarray containing the evaluation of the :ref:`LazyArray`.
343343
344344
Parameters
345345
----------
@@ -392,7 +392,7 @@ def save(self, **kwargs: Any) -> None:
392392
393393
Notes
394394
-----
395-
* All the operands of the LazyArray must be Python scalars, :ref:`NDArray`, :ref:`C2Array` or :ref:`Proxy`.
395+
* All the operands of the LazyArray must be Python scalars, or :ref:`blosc2.Array` objects.
396396
* If an operand is a :ref:`Proxy`, keep in mind that Python-Blosc2 will only be able to reopen it as such
397397
if its source is a :ref:`SChunk`, :ref:`NDArray` or a :ref:`C2Array` (see :func:`blosc2.open` notes
398398
section for more info).
@@ -507,14 +507,12 @@ def convert_inputs(inputs):
507507
return []
508508
inputs_ = []
509509
for obj in inputs:
510-
if not isinstance(
511-
obj, np.ndarray | blosc2.NDArray | blosc2.NDField | blosc2.C2Array
512-
) and not np.isscalar(obj):
510+
if not isinstance(obj, blosc2.Array) and not np.isscalar(obj):
513511
try:
514512
obj = np.asarray(obj)
515513
except Exception:
516514
print(
517-
"Inputs not being np.ndarray, NDArray, NDField, C2Array or Python scalar objects"
515+
"Inputs not being np.ndarray, Array or Python scalar objects"
518516
" should be convertible to np.ndarray."
519517
)
520518
raise
@@ -687,9 +685,9 @@ def visit_Call(self, node):
687685

688686
def conserve_functions( # noqa: C901
689687
expression: str,
690-
operands_old: dict[str, blosc2.NDArray | blosc2.LazyExpr],
691-
operands_new: dict[str, blosc2.NDArray | blosc2.LazyExpr],
692-
) -> tuple[str, dict[str, blosc2.NDArray]]:
688+
operands_old: dict[str, blosc2.Array],
689+
operands_new: dict[str, blosc2.Array],
690+
) -> tuple[str, dict[str, blosc2.Array]]:
693691
"""
694692
Given an expression in string form, return its operands.
695693
@@ -2029,7 +2027,7 @@ def chunked_eval( # noqa: C901
20292027
_getitem: bool, optional
20302028
Indicates whether the expression is being evaluated for a getitem operation.
20312029
Default is False.
2032-
_output: NDArray or np.ndarray, optional
2030+
_output: blosc2.Array, optional
20332031
The output array to store the result.
20342032
_ne_args: dict, optional
20352033
Additional arguments to be passed to `numexpr.evaluate()` function.
@@ -3252,7 +3250,7 @@ def info(self):
32523250
def info_items(self):
32533251
inputs = {}
32543252
for key, value in self.inputs_dict.items():
3255-
if isinstance(value, np.ndarray | blosc2.NDArray | blosc2.C2Array):
3253+
if isinstance(value, blosc2.Array):
32563254
inputs[key] = f"<{value.__class__.__name__}> {value.shape} {value.dtype}"
32573255
else:
32583256
inputs[key] = str(value)
@@ -3378,7 +3376,7 @@ def save(self, **kwargs):
33783376

33793377
def lazyudf(
33803378
func: Callable[[tuple, np.ndarray, tuple[int]], None],
3381-
inputs: tuple | list | None,
3379+
inputs: Sequence[Any] | None,
33823380
dtype: np.dtype,
33833381
shape: tuple | list | None = None,
33843382
chunked_eval: bool = True,
@@ -3396,11 +3394,11 @@ def lazyudf(
33963394
in :paramref:`inputs`.
33973395
- `output`: The buffer to be filled as a multidimensional numpy.ndarray.
33983396
- `offset`: The multidimensional offset corresponding to the start of the block being computed.
3399-
inputs: tuple or list or None
3400-
The sequence of inputs. Supported inputs are:
3401-
NumPy.ndarray, :ref:`NDArray`, :ref:`NDField`, :ref:`C2Array`.
3402-
Any other object is supported too, and will be passed as is to the user-defined function.
3403-
If not needed, this can be empty, but `shape` must be provided.
3397+
inputs: Sequence[Any] or None
3398+
The sequence of inputs. Besides objects compliant with the blosc2.Array protocol,
3399+
any other object is supported too, and it will be passed as-is to the
3400+
user-defined function. If not needed, this can be empty, but `shape` must
3401+
be provided.
34043402
dtype: np.dtype
34053403
The resulting ndarray dtype in NumPy format.
34063404
shape: tuple, optional
@@ -3482,9 +3480,9 @@ def seek_operands(names, local_dict=None, global_dict=None, _frame_depth: int =
34823480

34833481

34843482
def lazyexpr(
3485-
expression: str | bytes | LazyExpr | blosc2.NDArray,
3483+
expression: str | bytes | LazyArray | blosc2.NDArray,
34863484
operands: dict | None = None,
3487-
out: blosc2.NDArray | np.ndarray = None,
3485+
out: blosc2.Array = None,
34883486
where: tuple | list | None = None,
34893487
local_dict: dict | None = None,
34903488
global_dict: dict | None = None,
@@ -3496,15 +3494,15 @@ def lazyexpr(
34963494
34973495
Parameters
34983496
----------
3499-
expression: str or bytes or LazyExpr
3500-
The expression to evaluate. This can be any valid expression that can be
3501-
ingested by numexpr. If a LazyExpr is passed, the expression will be
3497+
expression: str or bytes or LazyExpr or NDArray
3498+
The expression to evaluate. This can be any valid expression that numexpr
3499+
can ingest. If a LazyExpr is passed, the expression will be
35023500
updated with the new operands.
3503-
operands: dict
3504-
The dictionary with operands. Supported values are NumPy.ndarray,
3505-
Python scalars, :ref:`NDArray`, :ref:`NDField` or :ref:`C2Array` instances.
3501+
operands: dict[blosc2.Array], optional
3502+
The dictionary with operands. Supported values are Python scalars,
3503+
or any instance that is blosc2.Array compliant.
35063504
If None, the operands will be seeked in the local and global dictionaries.
3507-
out: NDArray or np.ndarray, optional
3505+
out: blosc2.Array, optional
35083506
The output array where the result will be stored. If not provided,
35093507
a new NumPy array will be created and returned.
35103508
where: tuple, list, optional
@@ -3633,9 +3631,9 @@ def evaluate(
36333631
ex: str,
36343632
local_dict: dict | None = None,
36353633
global_dict: dict | None = None,
3636-
out: np.ndarray | blosc2.NDArray = None,
3634+
out: blosc2.Array = None,
36373635
**kwargs: Any,
3638-
) -> np.ndarray | blosc2.NDArray:
3636+
) -> blosc2.Array:
36393637
"""
36403638
Evaluate a string expression using the Blosc2 compute engine.
36413639
@@ -3659,15 +3657,15 @@ def evaluate(
36593657
global_dict: dict, optional
36603658
The global dictionary to use when looking for operands in the expression.
36613659
If not provided, the global dictionary of the caller will be used.
3662-
out: NDArray or np.ndarray, optional
3660+
out: blosc2.Array, optional
36633661
The output array where the result will be stored. If not provided,
36643662
a new NumPy array will be created and returned.
36653663
kwargs: Any, optional
36663664
Additional arguments to be passed to `numexpr.evaluate()` function.
36673665
36683666
Returns
36693667
-------
3670-
out: NumPy or NDArray
3668+
out: blosc2.Array
36713669
The result of the expression evaluation. If out is provided, the result
36723670
will be stored in out and returned at the same time.
36733671

src/blosc2/linalg.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from collections.abc import Sequence
1616

1717

18-
def matmul(x1: blosc2.NDArray | np.ndarray, x2: blosc2.NDArray, **kwargs: Any) -> blosc2.NDArray:
18+
def matmul(x1: blosc2.Array, x2: blosc2.NDArray, **kwargs: Any) -> blosc2.NDArray:
1919
"""
2020
Computes the matrix product between two Blosc2 NDArrays.
2121
@@ -417,7 +417,7 @@ def vecdot(x1: blosc2.NDArray, x2: blosc2.NDArray, axis: int = -1, **kwargs) ->
417417

418418

419419
def permute_dims(
420-
arr: blosc2.NDArray | np.ndarray, axes: tuple[int] | list[int] | None = None, **kwargs: Any
420+
arr: blosc2.Array, axes: tuple[int] | list[int] | None = None, **kwargs: Any
421421
) -> blosc2.NDArray:
422422
"""
423423
Permutes the axes (dimensions) of an array.
@@ -566,7 +566,7 @@ def transpose(x, **kwargs: Any) -> blosc2.NDArray:
566566
return permute_dims(x, **kwargs)
567567

568568

569-
def matrix_transpose(arr: blosc2.NDArray | np.ndarray, **kwargs: Any) -> blosc2.NDArray:
569+
def matrix_transpose(arr: blosc2.Array, **kwargs: Any) -> blosc2.NDArray:
570570
"""
571571
Transposes a matrix (or a stack of matrices).
572572

0 commit comments

Comments
 (0)