Skip to content

Commit 1159e65

Browse files
committed
Added integration tests for Arro3, comparable to PyArrow tests
1 parent 6455640 commit 1159e65

File tree

2 files changed

+278
-0
lines changed

2 files changed

+278
-0
lines changed

Tests/test_arro3.py

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from typing import Any, NamedTuple
5+
from itertools import repeat, chain
6+
7+
import pytest
8+
9+
from PIL import Image
10+
11+
from .helper import (
12+
assert_deep_equal,
13+
assert_image_equal,
14+
hopper,
15+
is_big_endian,
16+
)
17+
18+
TYPE_CHECKING = False
19+
if TYPE_CHECKING:
20+
from arro3.core import Array, DataType, Field, fixed_size_list_array
21+
from arro3 import compute
22+
else:
23+
arro3 = pytest.importorskip("arro3", reason="Arro3 not installed")
24+
from arro3.core import Array, DataType, Field, fixed_size_list_array
25+
from arro3 import compute
26+
27+
TEST_IMAGE_SIZE = (10, 10)
28+
29+
30+
def _test_img_equals_pyarray(
31+
img: Image.Image, arr: Any, mask: list[int] | None, elts_per_pixel: int = 1
32+
) -> None:
33+
assert img.height * img.width * elts_per_pixel == len(arr)
34+
px = img.load()
35+
assert px is not None
36+
if elts_per_pixel > 1 and mask is None:
37+
# have to do element-wise comparison when we're comparing
38+
# flattened r,g,b,a to a pixel.
39+
mask = list(range(elts_per_pixel))
40+
for x in range(0, img.size[0], int(img.size[0] / 10)):
41+
for y in range(0, img.size[1], int(img.size[1] / 10)):
42+
if mask:
43+
pixel = px[x, y]
44+
assert isinstance(pixel, tuple)
45+
for ix, elt in enumerate(mask):
46+
if elts_per_pixel == 1:
47+
assert pixel[ix] == arr[y * img.width + x].as_py()[elt]
48+
else:
49+
assert (
50+
pixel[ix]
51+
== arr[(y * img.width + x) * elts_per_pixel + elt].as_py()
52+
)
53+
else:
54+
assert_deep_equal(px[x, y], arr[y * img.width + x].as_py())
55+
56+
57+
def _test_img_equals_int32_pyarray(
58+
img: Image.Image, arr: Any, mask: list[int] | None, elts_per_pixel: int = 1
59+
) -> None:
60+
assert img.height * img.width * elts_per_pixel == len(arr)
61+
px = img.load()
62+
assert px is not None
63+
if mask is None:
64+
# have to do element-wise comparison when we're comparing
65+
# flattened rgba in an uint32 to a pixel.
66+
mask = list(range(elts_per_pixel))
67+
for x in range(0, img.size[0], int(img.size[0] / 10)):
68+
for y in range(0, img.size[1], int(img.size[1] / 10)):
69+
pixel = px[x, y]
70+
assert isinstance(pixel, tuple)
71+
arr_pixel_int = arr[y * img.width + x].as_py()
72+
arr_pixel_tuple = (
73+
arr_pixel_int % 256,
74+
(arr_pixel_int // 256) % 256,
75+
(arr_pixel_int // 256**2) % 256,
76+
(arr_pixel_int // 256**3),
77+
)
78+
if is_big_endian():
79+
arr_pixel_tuple = arr_pixel_tuple[::-1]
80+
81+
for ix, elt in enumerate(mask):
82+
assert pixel[ix] == arr_pixel_tuple[elt]
83+
84+
fl_uint8_4_type = DataType.list(Field("_", DataType.uint8()).with_nullable(False), 4)
85+
86+
@pytest.mark.parametrize(
87+
"mode, dtype, mask",
88+
(
89+
("L", DataType.uint8(), None),
90+
("I", DataType.int32(), None),
91+
("F", DataType.float32(), None),
92+
("LA", fl_uint8_4_type, [0, 3]),
93+
("RGB", fl_uint8_4_type, [0, 1, 2]),
94+
("RGBA", fl_uint8_4_type, None),
95+
("RGBX", fl_uint8_4_type, None),
96+
("CMYK", fl_uint8_4_type, None),
97+
("YCbCr", fl_uint8_4_type, [0, 1, 2]),
98+
("HSV", fl_uint8_4_type, [0, 1, 2]),
99+
),
100+
)
101+
def test_to_array(mode: str, dtype: DataType, mask: list[int] | None) -> None:
102+
img = hopper(mode)
103+
104+
# Resize to non-square
105+
img = img.crop((3, 0, 124, 127))
106+
assert img.size == (121, 127)
107+
108+
arr = Array(img) # type: ignore[call-overload]
109+
_test_img_equals_pyarray(img, arr, mask)
110+
assert arr.type == dtype
111+
112+
reloaded = Image.fromarrow(arr, mode, img.size)
113+
114+
assert reloaded
115+
116+
assert_image_equal(img, reloaded)
117+
118+
119+
def test_lifetime() -> None:
120+
# valgrind shouldn't error out here.
121+
# arrays should be accessible after the image is deleted.
122+
123+
img = hopper("L")
124+
125+
arr_1 = Array(img) # type: ignore[call-overload]
126+
arr_2 = Array(img) # type: ignore[call-overload]
127+
128+
del img
129+
130+
assert compute.sum(arr_1).as_py() > 0
131+
del arr_1
132+
133+
assert compute.sum(arr_2).as_py() > 0
134+
del arr_2
135+
136+
137+
def test_lifetime2() -> None:
138+
# valgrind shouldn't error out here.
139+
# img should remain after the arrays are collected.
140+
141+
img = hopper("L")
142+
143+
arr_1 = Array(img) # type: ignore[call-overload]
144+
arr_2 = Array(img) # type: ignore[call-overload]
145+
146+
assert compute.sum(arr_1).as_py() > 0
147+
del arr_1
148+
149+
assert compute.sum(arr_2).as_py() > 0
150+
del arr_2
151+
152+
img2 = img.copy()
153+
px = img2.load()
154+
assert px # make mypy happy
155+
assert isinstance(px[0, 0], int)
156+
157+
158+
class DataShape(NamedTuple):
159+
dtype: DataType
160+
# Strictly speaking, elt should be a pixel or pixel component, so
161+
# list[uint8][4], float, int, uint32, uint8, etc. But more
162+
# correctly, it should be exactly the dtype from the line above.
163+
elt: Any
164+
elts_per_pixel: int
165+
166+
167+
UINT_ARR = DataShape(
168+
dtype=fl_uint8_4_type,
169+
elt=[1, 2, 3, 4], # array of 4 uint8 per pixel
170+
elts_per_pixel=1, # only one array per pixel
171+
)
172+
173+
UINT = DataShape(
174+
dtype=DataType.uint8(),
175+
elt=3, # one uint8,
176+
elts_per_pixel=4, # but repeated 4x per pixel
177+
)
178+
179+
UINT32 = DataShape(
180+
dtype=DataType.uint32(),
181+
elt=0xABCDEF45, # one packed int, doesn't fit in a int32 > 0x80000000
182+
elts_per_pixel=1, # one per pixel
183+
)
184+
185+
INT32 = DataShape(
186+
dtype=DataType.uint32(),
187+
elt=0x12CDEF45, # one packed int
188+
elts_per_pixel=1, # one per pixel
189+
)
190+
191+
192+
@pytest.mark.parametrize(
193+
"mode, data_tp, mask",
194+
(
195+
("L", DataShape(DataType.uint8(), 3, 1), None),
196+
("I", DataShape(DataType.int32(), 1 << 24, 1), None),
197+
("F", DataShape(DataType.float32(), 3.14159, 1), None),
198+
("LA", UINT_ARR, [0, 3]),
199+
("LA", UINT, [0, 3]),
200+
("RGB", UINT_ARR, [0, 1, 2]),
201+
("RGBA", UINT_ARR, None),
202+
("CMYK", UINT_ARR, None),
203+
("YCbCr", UINT_ARR, [0, 1, 2]),
204+
("HSV", UINT_ARR, [0, 1, 2]),
205+
("RGB", UINT, [0, 1, 2]),
206+
("RGBA", UINT, None),
207+
("CMYK", UINT, None),
208+
("YCbCr", UINT, [0, 1, 2]),
209+
("HSV", UINT, [0, 1, 2]),
210+
),
211+
)
212+
def test_fromarray(mode: str, data_tp: DataShape, mask: list[int] | None) -> None:
213+
(dtype, elt, elts_per_pixel) = data_tp
214+
215+
ct_pixels = TEST_IMAGE_SIZE[0] * TEST_IMAGE_SIZE[1]
216+
if dtype == fl_uint8_4_type:
217+
tmp_arr = Array(elt * (ct_pixels * elts_per_pixel), type=DataType.uint8())
218+
arr = fixed_size_list_array(tmp_arr, 4)
219+
else:
220+
arr = Array([elt] * (ct_pixels * elts_per_pixel), type=dtype)
221+
img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE)
222+
223+
_test_img_equals_pyarray(img, arr, mask, elts_per_pixel)
224+
225+
226+
@pytest.mark.parametrize(
227+
"mode, data_tp, mask",
228+
(
229+
("LA", UINT32, [0, 3]),
230+
("RGB", UINT32, [0, 1, 2]),
231+
("RGBA", UINT32, None),
232+
("CMYK", UINT32, None),
233+
("YCbCr", UINT32, [0, 1, 2]),
234+
("HSV", UINT32, [0, 1, 2]),
235+
("LA", INT32, [0, 3]),
236+
("RGB", INT32, [0, 1, 2]),
237+
("RGBA", INT32, None),
238+
("CMYK", INT32, None),
239+
("YCbCr", INT32, [0, 1, 2]),
240+
("HSV", INT32, [0, 1, 2]),
241+
),
242+
)
243+
def test_from_int32array(mode: str, data_tp: DataShape, mask: list[int] | None) -> None:
244+
(dtype, elt, elts_per_pixel) = data_tp
245+
246+
ct_pixels = TEST_IMAGE_SIZE[0] * TEST_IMAGE_SIZE[1]
247+
arr = Array([elt] * (ct_pixels * elts_per_pixel), type=dtype)
248+
img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE)
249+
250+
_test_img_equals_int32_pyarray(img, arr, mask, elts_per_pixel)
251+
252+
253+
@pytest.mark.parametrize(
254+
"mode, metadata",
255+
(
256+
("LA", ["L", "X", "X", "A"]),
257+
("RGB", ["R", "G", "B", "X"]),
258+
("RGBX", ["R", "G", "B", "X"]),
259+
("RGBA", ["R", "G", "B", "A"]),
260+
("CMYK", ["C", "M", "Y", "K"]),
261+
("YCbCr", ["Y", "Cb", "Cr", "X"]),
262+
("HSV", ["H", "S", "V", "X"]),
263+
),
264+
)
265+
def test_image_metadata(mode: str, metadata: list[str]) -> None:
266+
img = hopper(mode)
267+
268+
arr = Array(img) # type: ignore[call-overload]
269+
270+
assert arr.type.value_field.metadata
271+
assert arr.type.value_field.metadata[b"image"]
272+
273+
parsed_metadata = json.loads(arr.type.value_field.metadata[b"image"].decode("utf8"))
274+
275+
assert "bands" in parsed_metadata
276+
assert parsed_metadata["bands"] == metadata

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ optional-dependencies.mic = [
5858
]
5959
optional-dependencies.test-arrow = [
6060
"pyarrow",
61+
"arro3-core",
62+
"arro3-compute",
6163
]
6264

6365
optional-dependencies.tests = [

0 commit comments

Comments
 (0)