Skip to content

Commit f9e97aa

Browse files
committed
Various final fixes
1 parent 09d8f29 commit f9e97aa

File tree

5 files changed

+110
-57
lines changed

5 files changed

+110
-57
lines changed

src/blosc2/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ def __array_namespace_info__() -> Info:
350350
frombuffer,
351351
fromiter,
352352
get_slice_nchunks,
353+
meshgrid,
353354
nans,
354355
uninit,
355356
zeros,
@@ -578,6 +579,7 @@ def __array_namespace_info__() -> Info:
578579
"matrix_transpose",
579580
"max",
580581
"mean",
582+
"meshgrid",
581583
"min",
582584
"nans",
583585
"ndarray_from_cframe",

src/blosc2/blosc2_ext.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,8 +2612,8 @@ cdef class NDArray:
26122612
mask_[i] = mask[i]
26132613
_check_rc(b2nd_squeeze_index(self.array, mask_), "Error while squeezing array")
26142614

2615-
if self.array.shape[0] == 1 and self.ndim == 1:
2616-
self.array.ndim = 0
2615+
#if self.array.shape[0] == 1 and self.ndim == 1:
2616+
# self.array.ndim = 0
26172617

26182618
def as_ffi_ptr(self):
26192619
return PyCapsule_New(self.array, <char *> "b2nd_array_t*", NULL)

src/blosc2/lazyexpr.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,11 +1671,16 @@ def slices_eval_getitem(
16711671

16721672

16731673
def infer_reduction_dtype(dtype, operation):
1674-
# It may change in the future, but for now, this mimics NumPy's (2.1) behavior pretty well
1674+
# It may change in the future, but mostly array-api compliant
1675+
my_float = np.result_type(
1676+
dtype, np.float32 if dtype == np.float32 or dtype == np.complex64 else blosc2.DEFAULT_FLOAT
1677+
)
16751678
if operation in {ReduceOp.SUM, ReduceOp.PROD}:
1676-
return np.result_type(dtype, np.int64 if np.issubdtype(dtype, np.integer) else np.float64)
1679+
if np.issubdtype(dtype, np.unsignedinteger):
1680+
return np.result_type(dtype, np.uint64)
1681+
return np.result_type(dtype, np.int64 if np.issubdtype(dtype, np.integer) else my_float)
16771682
elif operation in {ReduceOp.MEAN, ReduceOp.STD, ReduceOp.VAR}:
1678-
return np.float64
1683+
return my_float
16791684
elif operation in {ReduceOp.MIN, ReduceOp.MAX}:
16801685
return dtype
16811686
elif operation in {ReduceOp.ANY, ReduceOp.ALL}:

src/blosc2/ndarray.py

Lines changed: 86 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,11 @@ def __index__(self) -> bool:
986986
)
987987
return self.__int__()
988988

989+
def __complex__(self) -> complex:
990+
if math.prod(self.shape) != 1:
991+
raise ValueError(f"Cannot convert array of shape {self.shape} to complex float.")
992+
return complex(self[()])
993+
989994
@is_documented_by(sum)
990995
def sum(self, axis=None, dtype=None, keepdims=False, **kwargs):
991996
expr = blosc2.LazyExpr(new_op=(self, None, None))
@@ -3701,7 +3706,7 @@ def arange_fill(inputs, output, offset):
37013706
start, _, step = inputs
37023707
start += offset[0] * step
37033708
stop = start + lout * step
3704-
if (stop - start) // step == lout: # USE ARANGE IF POSSIBLE (2X FASTER)
3709+
if math.ceil((stop - start) / step) == lout: # USE ARANGE IF POSSIBLE (2X FASTER)
37053710
output[:] = np.arange(start, stop, step, dtype=output.dtype)
37063711
else: # use linspace to have finer control over exclusion of endpoint for float types
37073712
output[:] = np.linspace(start, stop, lout, endpoint=False, dtype=output.dtype)
@@ -4202,23 +4207,29 @@ def save(array: NDArray, urlpath: str, contiguous=True, **kwargs: Any) -> None:
42024207
array.save(urlpath, contiguous, **kwargs)
42034208

42044209

4205-
def asarray(array: Sequence | np.ndarray | blosc2.C2Array | NDArray, copy=True, **kwargs: Any) -> NDArray:
4210+
def asarray( # noqa : C901
4211+
array: Sequence | np.ndarray | blosc2.C2Array | NDArray, copy: bool | None = None, **kwargs: Any
4212+
) -> NDArray:
42064213
"""Convert the `array` to an `NDArray`.
42074214
42084215
Parameters
42094216
----------
42104217
array: array_like
42114218
An array supporting numpy array interface.
42124219
4213-
Other Parameters
4214-
----------------
4220+
copy: bool | None, optional
4221+
Whether or not to copy the input. If True, the function copies.
4222+
If False, raise a ValueError if copy is necessary. If None and
4223+
input is NDArray, avoid copy by returning lazyexpr.
4224+
Default: None.
4225+
42154226
kwargs: dict, optional
42164227
Keyword arguments that are supported by the :func:`empty` constructor.
42174228
42184229
Returns
42194230
-------
4220-
out: :ref:`NDArray`
4221-
An new NDArray made of :paramref:`array`.
4231+
out: :ref:`NDArray` or :ref:`LazyExpr`
4232+
An new NDArray or LazyExpr made of :paramref:`array`.
42224233
42234234
Notes
42244235
-----
@@ -4237,8 +4248,7 @@ def asarray(array: Sequence | np.ndarray | blosc2.C2Array | NDArray, copy=True,
42374248
>>> # Create a NDArray from a NumPy array
42384249
>>> nda = blosc2.asarray(a)
42394250
"""
4240-
if not copy:
4241-
raise ValueError("asarray which avoids copy not implemented yet.")
4251+
42424252
# Convert scalars to numpy array
42434253
casting = kwargs.pop("casting", "unsafe")
42444254
if casting != "unsafe":
@@ -4256,41 +4266,49 @@ def asarray(array: Sequence | np.ndarray | blosc2.C2Array | NDArray, copy=True,
42564266
blocks = array.blocks
42574267
chunks, blocks = compute_chunks_blocks(array.shape, chunks, blocks, array.dtype, **kwargs)
42584268

4259-
# Fast path for small arrays. This is not too expensive in terms of memory consumption.
4260-
shape = array.shape
4261-
small_size = 2**24 # 16 MB
4262-
array_nbytes = math.prod(shape) * array.dtype.itemsize
4263-
if array_nbytes < small_size:
4264-
if not isinstance(array, np.ndarray) and hasattr(array, "chunks"):
4265-
# A getitem operation should be enough to get a numpy array
4266-
array = array[()]
4267-
4268-
array = np.require(array, dtype=dtype, requirements="C") # require contiguous array
4269-
4270-
return blosc2_ext.asarray(array, chunks, blocks, **kwargs)
4271-
4272-
# Create the empty array
4273-
ndarr = empty(shape, array.dtype, chunks=chunks, blocks=blocks, **kwargs)
4274-
behaved = are_partitions_behaved(shape, chunks, blocks)
4275-
4276-
# Get the coordinates of the chunks
4277-
chunks_idx, nchunks = get_chunks_idx(shape, chunks)
4278-
4279-
# Iterate over the chunks and update the empty array
4280-
for nchunk in range(nchunks):
4281-
# Compute current slice coordinates
4282-
coords = tuple(np.unravel_index(nchunk, chunks_idx))
4283-
slice_ = tuple(
4284-
slice(c * s, builtins.min((c + 1) * s, shape[i]))
4285-
for i, (c, s) in enumerate(zip(coords, chunks, strict=True))
4286-
)
4287-
# Ensure the array slice is contiguous and of correct dtype
4288-
array_slice = np.require(array[slice_], dtype=dtype, requirements="C")
4289-
if behaved:
4290-
# The whole chunk is to be updated, so this fastpath is safe
4291-
ndarr.schunk.update_data(nchunk, array_slice, copy=False)
4292-
else:
4293-
ndarr[slice_] = array_slice
4269+
copy = True if copy is None and not isinstance(array, NDArray) else copy
4270+
if copy:
4271+
# Fast path for small arrays. This is not too expensive in terms of memory consumption.
4272+
shape = array.shape
4273+
small_size = 2**24 # 16 MB
4274+
array_nbytes = math.prod(shape) * array.dtype.itemsize
4275+
if array_nbytes < small_size:
4276+
if not isinstance(array, np.ndarray) and hasattr(array, "chunks"):
4277+
# A getitem operation should be enough to get a numpy array
4278+
array = array[()]
4279+
4280+
array = np.require(array, dtype=dtype, requirements="C") # require contiguous array
4281+
4282+
return blosc2_ext.asarray(array, chunks, blocks, **kwargs)
4283+
4284+
# Create the empty array
4285+
ndarr = empty(shape, array.dtype, chunks=chunks, blocks=blocks, **kwargs)
4286+
behaved = are_partitions_behaved(shape, chunks, blocks)
4287+
4288+
# Get the coordinates of the chunks
4289+
chunks_idx, nchunks = get_chunks_idx(shape, chunks)
4290+
4291+
# Iterate over the chunks and update the empty array
4292+
for nchunk in range(nchunks):
4293+
# Compute current slice coordinates
4294+
coords = tuple(np.unravel_index(nchunk, chunks_idx))
4295+
slice_ = tuple(
4296+
slice(c * s, builtins.min((c + 1) * s, shape[i]))
4297+
for i, (c, s) in enumerate(zip(coords, chunks, strict=True))
4298+
)
4299+
# Ensure the array slice is contiguous and of correct dtype
4300+
array_slice = np.require(array[slice_], dtype=dtype, requirements="C")
4301+
if behaved:
4302+
# The whole chunk is to be updated, so this fastpath is safe
4303+
ndarr.schunk.update_data(nchunk, array_slice, copy=False)
4304+
else:
4305+
ndarr[slice_] = array_slice
4306+
else:
4307+
if not isinstance(array, NDArray):
4308+
raise ValueError("Must always do a copy for asarray unless NDArray provided.")
4309+
mask = [True] + [False for i in range(array.ndim)]
4310+
# TODO: make a direct view possible
4311+
return blosc2.expand_dims(array, axis=0).squeeze(mask) # way to get a view
42944312

42954313
return ndarr
42964314

@@ -5168,3 +5186,28 @@ def broadcast_to(arr, shape):
51685186
A new array with the given shape.
51695187
"""
51705188
return (arr + blosc2.zeros(shape, dtype=arr.dtype)).compute() # return lazyexpr quickly
5189+
5190+
5191+
def meshgrid(arrays: NDArray, indexing: str = "xy") -> Sequence[NDArray]:
5192+
"""
5193+
Returns coordinate matrices from coordinate vectors.
5194+
5195+
Parameters
5196+
---------
5197+
arrays: NDArray
5198+
An arbitrary number of one-dimensional arrays representing grid coordinates. Each array should have the same numeric data type.
5199+
5200+
indexing: str
5201+
Cartesian 'xy' or matrix 'ij' indexing of output. If provided zero or one one-dimensional vector(s) the indexing keyword is ignored.
5202+
Default: 'xy'.
5203+
5204+
Returns
5205+
--------
5206+
out: (List[NDArray])
5207+
List of N arrays, where N is the number of provided one-dimensional input arrays, with same dtype.
5208+
For N one-dimensional arrays having lengths Ni = len(xi),
5209+
5210+
* if matrix indexing ij, then each returned array has shape (N1, N2, N3, ..., Nn).
5211+
* if Cartesian indexing xy, then each returned array has shape (N2, N1, N3, ..., Nn).
5212+
"""
5213+
raise NotImplementedError("Working on meshgrid")

tests/ndarray/test_squeeze.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,27 @@
66
# LICENSE file in the root directory of this source tree)
77
#######################################################################
88

9+
import numpy as np
910
import pytest
1011

1112
import blosc2
1213

1314

1415
@pytest.mark.parametrize(
15-
("shape", "chunks", "blocks", "fill_value"),
16+
("shape", "chunks", "blocks", "fill_value", "mask"),
1617
[
17-
((1, 1230), (1, 100), (1, 3), b"0123"),
18-
((23, 1, 1, 34), (20, 1, 1, 20), None, 1234),
19-
((80, 1, 51, 60, 1), None, (6, 1, 6, 26, 1), 3.333),
20-
((1, 1, 1), None, None, True),
18+
((1, 1230), (1, 100), (1, 3), b"0123", [True, False]),
19+
((23, 1, 1, 34), (20, 1, 1, 20), None, 1234, [False, False, True, False]),
20+
((80, 1, 51, 60, 1), None, (6, 1, 6, 26, 1), 3.333, [False] * 4 + [True]),
21+
((1, 1, 1), None, None, True, [False, True, True]),
2122
],
2223
)
23-
def test_squeeze(shape, chunks, blocks, fill_value):
24+
def test_squeeze(shape, chunks, blocks, fill_value, mask):
2425
a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks)
2526

26-
a.squeeze()
27-
b = a[...]
27+
b = np.squeeze(a[...], tuple(i for i, m in enumerate(mask) if m))
28+
a_ = a.squeeze(mask)
2829

29-
assert a.shape == b.shape
30+
assert a_.shape == b.shape
31+
# TODO: this would work if squeeze returns a view
32+
# assert a_.shape != a.shape

0 commit comments

Comments
 (0)