Skip to content

Commit 6cbb5cb

Browse files
committed
Add an eye constructor
1 parent 1e9a85e commit 6cbb5cb

File tree

6 files changed

+130
-16
lines changed

6 files changed

+130
-16
lines changed

doc/reference/ndarray.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,5 @@ Constructors
8282
full
8383
arange
8484
linspace
85+
eye
8586
reshape
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#######################################################################
2+
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under a BSD-style license (found in the
6+
# LICENSE file in the root directory of this source tree)
7+
#######################################################################
8+
9+
# This example shows how to use the `eye()` constructor to create a blosc2 array.
10+
11+
import math
12+
from time import time
13+
14+
import numpy as np
15+
16+
import blosc2
17+
18+
N = 20_000
19+
20+
shape = (N, N)
21+
print(f"*** Creating a blosc2 eye array with shape: {shape} ***")
22+
t0 = time()
23+
a = blosc2.eye(*shape, dtype=np.int8)
24+
cratio = a.schunk.nbytes / a.schunk.cbytes
25+
print(
26+
f"Time: {time() - t0:.3f} s ({math.prod(shape) / (time() - t0) / 1e6:.2f} M/s)"
27+
f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)"
28+
)
29+
print(f"Last 3 elements:\n{a[-3:]}")
30+
31+
# You can create rectangular arrays too
32+
shape = (N, N * 5)
33+
print(f"*** Creating a blosc2 eye array with shape: {shape} ***")
34+
t0 = time()
35+
a = blosc2.eye(*shape, dtype=np.int8)
36+
cratio = a.schunk.nbytes / a.schunk.cbytes
37+
print(
38+
f"Time: {time() - t0:.3f} s ({math.prod(shape) / (time() - t0) / 1e6:.2f} M/s)"
39+
f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)"
40+
)
41+
print(f"First 3 elements:\n{a[:3]}")
42+
43+
44+
# In conclusion, you can use blosc2 eye() to create blosc2 arrays requiring much less storage
45+
# than numpy arrays.

src/blosc2/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ class Tuner(Enum):
215215
are_partitions_behaved,
216216
arange,
217217
linspace,
218+
eye,
218219
asarray,
219220
indices,
220221
sort,

src/blosc2/lazyexpr.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from enum import Enum
2424
from pathlib import Path
2525
from queue import Empty, Queue
26-
from typing import TYPE_CHECKING
26+
from typing import TYPE_CHECKING, Any
2727

2828
from numpy.exceptions import ComplexWarning
2929

@@ -197,7 +197,7 @@ def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray:
197197
pass
198198

199199
@abstractmethod
200-
def compute(self, item: slice | list[slice] | None = None, **kwargs: dict) -> blosc2.NDArray:
200+
def compute(self, item: slice | list[slice] | None = None, **kwargs: Any) -> blosc2.NDArray:
201201
"""
202202
Return an :ref:`NDArray` containing the evaluation of the :ref:`LazyArray`.
203203
@@ -207,7 +207,7 @@ def compute(self, item: slice | list[slice] | None = None, **kwargs: dict) -> bl
207207
If not None, only the chunks that intersect with the slices
208208
in items will be evaluated.
209209
210-
kwargs: dict, optional
210+
kwargs: Any, optional
211211
Keyword arguments that are supported by the :func:`empty` constructor.
212212
These arguments will be set in the resulting :ref:`NDArray`.
213213
@@ -284,13 +284,13 @@ def __getitem__(self, item: int | slice | Sequence[slice]) -> blosc2.NDArray:
284284
pass
285285

286286
@abstractmethod
287-
def save(self, **kwargs: dict) -> None:
287+
def save(self, **kwargs: Any) -> None:
288288
"""
289289
Save the :ref:`LazyArray` on disk.
290290
291291
Parameters
292292
----------
293-
kwargs: dict, optional
293+
kwargs: Any, optional
294294
Keyword arguments that are supported by the :func:`empty` constructor.
295295
The `urlpath` must always be provided.
296296
@@ -384,7 +384,7 @@ def info(self) -> InfoReporter:
384384

385385

386386
def convert_inputs(inputs):
387-
if len(inputs) == 0:
387+
if not inputs or len(inputs) == 0:
388388
return []
389389
inputs_ = []
390390
for obj in inputs:
@@ -595,7 +595,10 @@ def validate_inputs(inputs: dict, out=None) -> tuple: # noqa: C901
595595
"You really want to pass at least one input or one output for building a LazyArray."
596596
" Maybe you want blosc2.empty() instead?"
597597
)
598-
return out.shape, out.chunks, out.blocks, True
598+
if isinstance(out, blosc2.NDArray):
599+
return out.shape, out.chunks, out.blocks, True
600+
else:
601+
return out.shape, None, None, True
599602

600603
inputs = [input for input in inputs.values() if hasattr(input, "shape") and input is not np]
601604
shape = compute_broadcast_shape(inputs)
@@ -878,7 +881,7 @@ def fast_eval( # noqa: C901
878881
getitem: bool, optional
879882
Indicates whether the expression is being evaluated for a getitem operation or eval().
880883
Default is False.
881-
kwargs: dict, optional
884+
kwargs: Any, optional
882885
Additional keyword arguments supported by the :func:`empty` constructor.
883886
884887
Returns
@@ -892,6 +895,9 @@ def fast_eval( # noqa: C901
892895
if isinstance(out, blosc2.NDArray):
893896
# If 'out' has been passed, and is a NDArray, use it as the base array
894897
basearr = out
898+
elif isinstance(out, np.ndarray):
899+
# If 'out' is a NumPy array, create a NDArray with the same shape and dtype
900+
basearr = blosc2.empty(out.shape, dtype=out.dtype, **kwargs)
895901
else:
896902
# Otherwise, find the operand with the 'chunks' attribute and the longest shape
897903
operands_with_chunks = [o for o in operands.values() if hasattr(o, "chunks")]
@@ -1044,7 +1050,7 @@ def slices_eval( # noqa: C901
10441050
_slice: slice, list of slices, optional
10451051
If provided, only the chunks that intersect with this slice
10461052
will be evaluated.
1047-
kwargs: dict, optional
1053+
kwargs: Any, optional
10481054
Additional keyword arguments that are supported by the :func:`empty` constructor.
10491055
10501056
Returns
@@ -1293,7 +1299,7 @@ def reduce_slices( # noqa: C901
12931299
_slice: slice, list of slices, optional
12941300
If provided, only the chunks that intersect with this slice
12951301
will be evaluated.
1296-
kwargs: dict, optional
1302+
kwargs: Any, optional
12971303
Additional keyword arguments supported by the :func:`empty` constructor.
12981304
12991305
Returns
@@ -1530,7 +1536,7 @@ def chunked_eval( # noqa: C901
15301536
A dictionary containing the operands for the expression.
15311537
item: int, slice or sequence of slices, optional
15321538
The slice(s) to be retrieved. Note that step parameter is not honored yet.
1533-
kwargs: dict, optional
1539+
kwargs: Any, optional
15341540
Additional keyword arguments supported by the :func:`empty` constructor. In addition,
15351541
the following keyword arguments are supported:
15361542
_getitem: bool, optional
@@ -2680,11 +2686,11 @@ def save(self, **kwargs):
26802686

26812687
def lazyudf(
26822688
func: Callable[[tuple, np.ndarray, tuple[int]], None],
2683-
inputs: tuple | list,
2689+
inputs: tuple | list | None,
26842690
dtype: np.dtype,
2685-
shape: tuple[int] | None = None,
2691+
shape: tuple | list | None = None,
26862692
chunked_eval: bool = True,
2687-
**kwargs: dict,
2693+
**kwargs: Any,
26882694
) -> LazyUDF:
26892695
"""
26902696
Get a LazyUDF from a python user-defined function.
@@ -2698,7 +2704,7 @@ def lazyudf(
26982704
in :paramref:`inputs`.
26992705
- `output`: The buffer to be filled as a multidimensional numpy.ndarray.
27002706
- `offset`: The multidimensional offset corresponding to the start of the block being computed.
2701-
inputs: tuple or list
2707+
inputs: tuple or list or None
27022708
The sequence of inputs. Supported inputs are:
27032709
NumPy.ndarray, :ref:`NDArray`, :ref:`NDField`, :ref:`C2Array`.
27042710
Any other object is supported too, and will be passed as is to the user-defined function.
@@ -2709,7 +2715,7 @@ def lazyudf(
27092715
The shape of the resulting array. If None, the shape will be guessed from inputs.
27102716
chunked_eval: bool, optional
27112717
Whether to evaluate the function in chunks or not (blocks).
2712-
kwargs: dict, optional
2718+
kwargs: Any, optional
27132719
Keyword arguments that are supported by the :func:`empty` constructor.
27142720
These arguments will be used by the :meth:`LazyArray.__getitem__` and
27152721
:meth:`LazyArray.eval` methods. The

src/blosc2/ndarray.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3200,6 +3200,55 @@ def linspace_fill(inputs, output, offset):
32003200
return reshape(larr, shape, c_order=c_order, **kwargs)
32013201

32023202

3203+
def eye(N, M=None, k=0, dtype=np.float64, **kwargs: Any):
3204+
"""Return a 2-D array with ones on the diagonal and zeros elsewhere.
3205+
3206+
Parameters
3207+
----------
3208+
N: int
3209+
Number of rows in the output.
3210+
M: int, optional
3211+
Number of columns in the output. If None, defaults to `N`.
3212+
k: int, optional
3213+
Index of the diagonal: 0 (the default) refers to the main diagonal,
3214+
a positive value refers to an upper diagonal, and a negative value
3215+
to a lower diagonal.
3216+
dtype: np.dtype or str
3217+
The data type of the array elements in NumPy format. Default is `np.float64`.
3218+
3219+
Returns
3220+
-------
3221+
out: :ref:`NDArray`
3222+
A :ref:`NDArray` is returned.
3223+
3224+
Examples
3225+
--------
3226+
>>> import blosc2
3227+
>>> import numpy as np
3228+
>>> array = blosc2.eye(2, 3, dtype=np.int32)
3229+
>>> print(array[:])
3230+
[[1 0 0]
3231+
[0 1 0]]
3232+
"""
3233+
3234+
def fill_eye(inputs, output: np.array, offset: tuple):
3235+
out_k = offset[0] - offset[1] + inputs[0]
3236+
output[:] = np.eye(*output.shape, out_k, dtype=output.dtype)
3237+
3238+
if M is None:
3239+
M = N
3240+
shape = (N, M)
3241+
# Check dtype
3242+
dtype = np.dtype(dtype)
3243+
3244+
if is_inside_new_expr():
3245+
# We already have the dtype and shape, so return immediately
3246+
return blosc2.zeros(shape, dtype=dtype)
3247+
3248+
lazyarr = blosc2.lazyudf(fill_eye, (k,), dtype=dtype, shape=shape)
3249+
return lazyarr.compute(**kwargs)
3250+
3251+
32033252
def fromiter(iterable, shape, dtype, c_order=True, **kwargs) -> NDArray:
32043253
"""Create a new array from an iterable object.
32053254

tests/ndarray/test_ndarray.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,18 @@ def test_linspace(ss, shape, dtype, chunks, blocks, endpoint, c_order):
189189
pass
190190

191191

192+
@pytest.mark.parametrize(("N", "M"), [(10, None), (10, 20), (20, 10)])
193+
@pytest.mark.parametrize("k", [-1, 0, 1, 2, 3])
194+
@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32])
195+
@pytest.mark.parametrize("chunks", [(5, 6), (10, 9)])
196+
def test_eye(k, N, M, dtype, chunks):
197+
a = np.eye(N, M, k, dtype=dtype)
198+
b = blosc2.eye(N, M, k, dtype=dtype, chunks=chunks)
199+
assert a.shape == b.shape
200+
assert a.dtype == b.dtype
201+
np.testing.assert_allclose(a, b[:])
202+
203+
192204
@pytest.mark.parametrize(
193205
("it", "shape", "dtype", "chunks", "blocks"),
194206
[

0 commit comments

Comments
 (0)