Skip to content

Commit 2dc5c33

Browse files
committed
Closes #5258: alignment tests for arkouda.numpy.pdarrayclass
1 parent f464e45 commit 2dc5c33

File tree

2 files changed

+376
-0
lines changed

2 files changed

+376
-0
lines changed

pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ testpaths =
5252
tests/numpy/err_test.py
5353
tests/numpy/manipulation_functions_test.py
5454
tests/numpy/alignment_verification/operators_alignment.py
55+
tests/numpy/alignment_verification/pdarrayclass_alignment.py
5556
tests/numpy/numeric_test.py
5657
tests/numpy/numpy_test.py
5758
tests/numpy/pdarrayclass_test.py
Lines changed: 375 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,375 @@
1+
import operator
2+
3+
from typing import Callable, Tuple
4+
5+
import numpy as np
6+
import pytest
7+
8+
import arkouda as ak
9+
10+
11+
# -----------------------------
12+
# Helpers
13+
# -----------------------------
14+
def _np_dtype_for_kind(kind: str) -> np.dtype:
15+
if kind == "int":
16+
return np.dtype(np.int64)
17+
if kind == "uint":
18+
return np.dtype(np.uint64)
19+
if kind == "float":
20+
return np.dtype(np.float64)
21+
if kind == "bool":
22+
return np.dtype(np.bool_)
23+
raise ValueError(f"unknown kind={kind}")
24+
25+
26+
def _make_data(kind: str, n: int, seed: int = 0) -> Tuple[np.ndarray, "ak.pdarray"]:
27+
rng = np.random.default_rng(seed)
28+
dt = _np_dtype_for_kind(kind)
29+
30+
if kind == "int":
31+
a_np = rng.integers(-100, 100, size=n, dtype=dt)
32+
a_ak = ak.array(a_np)
33+
return a_np, a_ak
34+
35+
if kind == "uint":
36+
a_np = rng.integers(0, 200, size=n, dtype=dt)
37+
a_ak = ak.array(a_np)
38+
return a_np, a_ak
39+
40+
if kind == "float":
41+
a_np = rng.normal(size=n).astype(dt)
42+
# sprinkle NaNs to exercise NaN semantics
43+
if n >= 10:
44+
a_np[::10] = np.nan
45+
a_ak = ak.array(a_np)
46+
return a_np, a_ak
47+
48+
if kind == "bool":
49+
a_np = rng.integers(0, 2, size=n, dtype=np.int8).astype(dt)
50+
a_ak = ak.array(a_np)
51+
return a_np, a_ak
52+
53+
raise ValueError(f"unknown kind={kind}")
54+
55+
56+
def _assert_np_ak_same(a_np: np.ndarray, a_ak: "ak.pdarray") -> None:
57+
got = a_ak.to_ndarray()
58+
59+
assert got.shape == a_np.shape
60+
61+
# dtype alignment is sometimes intentionally different (e.g. int32 vs int64),
62+
# but pdarray typically uses 64-bit types; adjust this if your project differs.
63+
# This checks "kind" alignment rather than exact dtype string.
64+
assert got.dtype.kind == a_np.dtype.kind
65+
66+
if got.dtype.kind == "f":
67+
np.testing.assert_allclose(got, a_np, equal_nan=True, rtol=1e-12, atol=0.0)
68+
else:
69+
np.testing.assert_array_equal(got, a_np)
70+
71+
72+
# -----------------------------
73+
# Binary operator alignment
74+
# -----------------------------
75+
_BINARY_CASES = [
76+
# (op_name, numpy_callable, python_operator_callable)
77+
("add", np.add, operator.add),
78+
("sub", np.subtract, operator.sub),
79+
("mul", np.multiply, operator.mul),
80+
("truediv", np.true_divide, operator.truediv),
81+
("floordiv", np.floor_divide, operator.floordiv),
82+
("mod", np.mod, operator.mod),
83+
("pow", np.power, operator.pow),
84+
("and", np.bitwise_and, operator.and_),
85+
("or", np.bitwise_or, operator.or_),
86+
("xor", np.bitwise_xor, operator.xor),
87+
("lshift", np.left_shift, operator.lshift),
88+
("rshift", np.right_shift, operator.rshift),
89+
]
90+
91+
92+
def _binary_op_supported(kind: str, opname: str) -> bool:
93+
if kind == "bool":
94+
return opname in {
95+
"add", # +
96+
"mul", # *
97+
"pow", # **
98+
"and", # &
99+
"or", # |
100+
"xor", # ^
101+
"eq",
102+
"ne",
103+
"lt",
104+
"le",
105+
"gt",
106+
"ge",
107+
}
108+
109+
if opname in {"lshift", "rshift"}:
110+
return kind in {"int", "uint"}
111+
112+
if opname in {"and", "or", "xor"}:
113+
return kind in {"int", "uint", "bool"}
114+
115+
return True
116+
117+
118+
@pytest.mark.parametrize("kind", ["int", "uint", "float", "bool"])
119+
@pytest.mark.parametrize("opname,np_op,py_op", _BINARY_CASES)
120+
def test_pdarray_binary_ops_match_numpy(
121+
kind: str, opname: str, np_op: Callable, py_op: Callable
122+
) -> None:
123+
if not _binary_op_supported(kind, opname):
124+
pytest.skip(f"{opname} not supported for {kind}")
125+
126+
# --- Known NumPy alignment gaps (intentional xfails) ---
127+
128+
# 1) Signed integer floor-division semantics
129+
# NumPy: floor toward -inf
130+
# Arkouda: truncation toward 0
131+
if kind == "int" and opname == "floordiv":
132+
pytest.xfail("Arkouda uses truncating division for signed ints; NumPy uses floor division")
133+
134+
# 2) Signed integer modulo semantics (tied to floor-division)
135+
# NumPy: remainder has sign of divisor
136+
# Arkouda: remainder consistent with trunc division
137+
if kind == "int" and opname == "mod":
138+
pytest.xfail(
139+
"Arkouda modulo follows truncating division; NumPy remainder follows floor-division rules"
140+
)
141+
142+
# 3) Signed integer right shift
143+
# NumPy: arithmetic right shift (sign-propagating)
144+
# Arkouda: logical / zero-fill right shift
145+
if kind == "int" and opname == "rshift":
146+
pytest.xfail("Arkouda right shift on signed ints is logical; NumPy uses arithmetic shift")
147+
148+
# 4) Boolean power dtype promotion
149+
# NumPy: bool ** bool -> signed int
150+
# Arkouda: returns unsigned
151+
if kind == "bool" and opname == "pow":
152+
pytest.xfail("Arkouda bool ** bool returns unsigned dtype; NumPy promotes to signed int")
153+
154+
n = 101
155+
a_np, a_ak = _make_data(kind, n, seed=1)
156+
b_np, b_ak = _make_data(kind, n, seed=2)
157+
158+
# Avoid division/mod by zero instability
159+
if opname in {"truediv", "floordiv", "mod"}:
160+
if kind in {"int", "uint"}:
161+
b_np = b_np.copy()
162+
b_np[b_np == 0] = 1
163+
b_ak = ak.array(b_np)
164+
elif kind == "float":
165+
b_np = b_np.copy()
166+
b_np[np.isnan(b_np)] = 1.0
167+
b_np[b_np == 0.0] = 1.0
168+
b_ak = ak.array(b_np)
169+
170+
# Avoid huge pow overflow for ints
171+
if opname == "pow" and kind in {"int", "uint"}:
172+
a_np = (a_np % 10).astype(a_np.dtype)
173+
b_np = (np.abs(b_np) % 5).astype(b_np.dtype)
174+
a_ak = ak.array(a_np)
175+
b_ak = ak.array(b_np)
176+
177+
# pdarray OP pdarray
178+
got_ak = py_op(a_ak, b_ak)
179+
got_np = np_op(a_np, b_np)
180+
181+
_assert_np_ak_same(got_np, got_ak)
182+
183+
# pdarray OP scalar
184+
scalar = 3
185+
if kind == "float":
186+
scalar = 3.5
187+
188+
# For shifts, scalar must be non-negative and small
189+
if opname in {"lshift", "rshift"}:
190+
scalar = 2
191+
192+
got_ak2 = py_op(a_ak, scalar)
193+
got_np2 = np_op(a_np, scalar)
194+
_assert_np_ak_same(got_np2, got_ak2)
195+
196+
# scalar OP pdarray (reverse op)
197+
got_ak3 = py_op(scalar, a_ak)
198+
got_np3 = np_op(scalar, a_np)
199+
_assert_np_ak_same(got_np3, got_ak3)
200+
201+
202+
# -----------------------------
203+
# Comparisons
204+
# -----------------------------
205+
_COMPARE_CASES = [
206+
("lt", np.less, operator.lt),
207+
("le", np.less_equal, operator.le),
208+
("gt", np.greater, operator.gt),
209+
("ge", np.greater_equal, operator.ge),
210+
("eq", np.equal, operator.eq),
211+
("ne", np.not_equal, operator.ne),
212+
]
213+
214+
215+
@pytest.mark.parametrize("kind", ["int", "uint", "float", "bool"])
216+
@pytest.mark.parametrize("opname,np_op,py_op", _COMPARE_CASES)
217+
def test_pdarray_comparisons_match_numpy(
218+
kind: str, opname: str, np_op: Callable, py_op: Callable
219+
) -> None:
220+
n = 97
221+
a_np, a_ak = _make_data(kind, n, seed=11)
222+
b_np, b_ak = _make_data(kind, n, seed=12)
223+
224+
got_ak = py_op(a_ak, b_ak)
225+
got_np = np_op(a_np, b_np)
226+
227+
# comparisons should produce bool arrays
228+
assert got_ak.dtype == ak.bool_
229+
np.testing.assert_array_equal(got_ak.to_ndarray(), got_np)
230+
231+
232+
# -----------------------------
233+
# Unary ops
234+
# -----------------------------
235+
_UNARY_CASES = [
236+
("neg", np.negative, operator.neg),
237+
("pos", np.positive, operator.pos),
238+
("invert", np.invert, operator.invert),
239+
]
240+
241+
242+
def _unary_supported(kind: str, opname: str) -> bool:
243+
if opname == "invert":
244+
return kind in {"int", "uint", "bool"}
245+
return True
246+
247+
248+
@pytest.mark.parametrize("kind", ["int", "uint", "float", "bool"])
249+
@pytest.mark.parametrize("opname,np_op,py_op", _UNARY_CASES)
250+
def test_pdarray_unary_ops_match_numpy(kind: str, opname: str, np_op: Callable, py_op: Callable) -> None:
251+
if not _unary_supported(kind, opname):
252+
pytest.skip(f"{opname} not supported for {kind}")
253+
254+
a_np, a_ak = _make_data(kind, 123, seed=21)
255+
256+
# If NumPy raises for this unary op/dtype, Arkouda should also raise.
257+
try:
258+
expected_np = np_op(a_np) # noqa: F841
259+
except TypeError:
260+
with pytest.raises(TypeError):
261+
py_op(a_ak)
262+
return
263+
264+
got_ak = py_op(a_ak)
265+
got_np = np_op(a_np)
266+
_assert_np_ak_same(got_np, got_ak)
267+
268+
269+
# -----------------------------
270+
# Indexing / slicing alignment
271+
# -----------------------------
272+
@pytest.mark.parametrize("kind", ["int", "float", "bool"])
273+
def test_pdarray_basic_slicing_matches_numpy(kind: str) -> None:
274+
a_np, a_ak = _make_data(kind, 200, seed=31)
275+
276+
slices = [
277+
slice(None, None, None),
278+
slice(0, 10, None),
279+
slice(5, 50, 3),
280+
slice(-50, None, None),
281+
slice(None, None, -1),
282+
slice(150, 20, -7),
283+
]
284+
285+
for s in slices:
286+
got_ak = a_ak[s]
287+
got_np = a_np[s]
288+
_assert_np_ak_same(got_np, got_ak)
289+
290+
291+
@pytest.mark.parametrize("kind", ["int", "float"])
292+
def test_pdarray_boolean_mask_indexing_matches_numpy(kind: str) -> None:
293+
a_np, a_ak = _make_data(kind, 120, seed=41)
294+
mask_np, mask_ak = _make_data("bool", 120, seed=42)
295+
296+
got_ak = a_ak[mask_ak]
297+
got_np = a_np[mask_np]
298+
_assert_np_ak_same(got_np, got_ak)
299+
300+
301+
# -----------------------------
302+
# Reshape / flatten / take
303+
# -----------------------------
304+
@pytest.mark.skip_if_rank_not_compiled([2])
305+
@pytest.mark.parametrize("kind", ["int", "float", "bool"])
306+
def test_pdarray_reshape_and_flatten_match_numpy(kind: str) -> None:
307+
a_np, a_ak = _make_data(kind, 240, seed=51)
308+
309+
np_reshaped = a_np.reshape((16, 15))
310+
ak_reshaped = a_ak.reshape((16, 15))
311+
_assert_np_ak_same(np_reshaped, ak_reshaped)
312+
313+
np_flat = np_reshaped.flatten()
314+
ak_flat = ak_reshaped.flatten()
315+
_assert_np_ak_same(np_flat, ak_flat)
316+
317+
318+
@pytest.mark.parametrize("kind", ["int", "float"])
319+
def test_pdarray_take_matches_numpy(kind: str) -> None:
320+
a_np, a_ak = _make_data(kind, 100, seed=61)
321+
322+
idx_np = np.array([0, 3, 3, 9, 50, 99], dtype=np.int64)
323+
idx_ak = ak.array(idx_np)
324+
325+
got_ak = a_ak.take(idx_ak)
326+
got_np = np.take(a_np, idx_np)
327+
328+
_assert_np_ak_same(got_np, got_ak)
329+
330+
331+
# -----------------------------
332+
# Misc "array contract" behaviors
333+
# -----------------------------
334+
def test_pdarray_len_matches_numpy() -> None:
335+
a_np, a_ak = _make_data("int", 37, seed=71)
336+
assert len(a_ak) == len(a_np)
337+
338+
339+
def test_pdarray_bool_raises_like_numpy_for_non_scalar() -> None:
340+
# NumPy: bool(np.array([1,2])) raises ValueError: ambiguous truth value
341+
a_np = np.array([1, 2], dtype=np.int64)
342+
a_ak = ak.array(a_np)
343+
344+
with pytest.raises(ValueError):
345+
bool(a_np)
346+
347+
with pytest.raises(ValueError):
348+
bool(a_ak)
349+
350+
351+
@pytest.mark.parametrize("kind", ["int", "float", "bool"])
352+
def test_pdarray_equals_matches_numpy_array_equal(kind: str) -> None:
353+
a_np, a_ak = _make_data(kind, 55, seed=81)
354+
b_np = a_np.copy()
355+
b_ak = ak.array(b_np)
356+
357+
assert a_ak.equals(b_ak) == np.array_equal(a_np, b_np)
358+
359+
# mutate b
360+
b_np2 = b_np.copy()
361+
if kind == "float":
362+
b_np2[0] = 123.0
363+
else:
364+
b_np2[0] = ~b_np2[0] if kind == "bool" else (b_np2[0] + 1)
365+
b_ak2 = ak.array(b_np2)
366+
367+
assert a_ak.equals(b_ak2) == np.array_equal(a_np, b_np2)
368+
369+
370+
def test_helpers_raise_on_unknown_kind() -> None:
371+
with pytest.raises(ValueError, match=r"unknown kind="):
372+
_np_dtype_for_kind("nope")
373+
374+
with pytest.raises(ValueError, match=r"unknown kind="):
375+
_make_data("nope", 10, seed=0)

0 commit comments

Comments
 (0)