Skip to content

Commit 28c7645

Browse files
committed
Added tests for integration with nanoarrow
1 parent 1a02d4e commit 28c7645

File tree

2 files changed

+282
-0
lines changed

2 files changed

+282
-0
lines changed

Tests/test_nanoarrow.py

Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from typing import Any, NamedTuple
5+
6+
import pytest
7+
8+
from PIL import Image
9+
10+
from .helper import (
11+
assert_deep_equal,
12+
assert_image_equal,
13+
hopper,
14+
is_big_endian,
15+
)
16+
17+
TYPE_CHECKING = False
18+
if TYPE_CHECKING:
19+
import nanoarrow
20+
else:
21+
nanoarrow = pytest.importorskip("nanoarrow", reason="Nanoarrow not installed")
22+
23+
TEST_IMAGE_SIZE = (10, 10)
24+
25+
26+
def _test_img_equals_pyarray(
27+
img: Image.Image, arr: Any, mask: list[int] | None, elts_per_pixel: int = 1
28+
) -> None:
29+
assert img.height * img.width * elts_per_pixel == len(arr)
30+
px = img.load()
31+
assert px is not None
32+
if elts_per_pixel > 1 and mask is None:
33+
# have to do element-wise comparison when we're comparing
34+
# flattened r,g,b,a to a pixel.
35+
mask = list(range(elts_per_pixel))
36+
for x in range(0, img.size[0], int(img.size[0] / 10)):
37+
for y in range(0, img.size[1], int(img.size[1] / 10)):
38+
if mask:
39+
pixel = px[x, y]
40+
assert isinstance(pixel, tuple)
41+
for ix, elt in enumerate(mask):
42+
if elts_per_pixel == 1:
43+
assert pixel[ix] == arr[y * img.width + x].as_py()[elt]
44+
else:
45+
assert (
46+
pixel[ix]
47+
== arr[(y * img.width + x) * elts_per_pixel + elt].as_py()
48+
)
49+
else:
50+
assert_deep_equal(px[x, y], arr[y * img.width + x].as_py())
51+
52+
53+
def _test_img_equals_int32_pyarray(
54+
img: Image.Image, arr: Any, mask: list[int] | None, elts_per_pixel: int = 1
55+
) -> None:
56+
assert img.height * img.width * elts_per_pixel == len(arr)
57+
px = img.load()
58+
assert px is not None
59+
if mask is None:
60+
# have to do element-wise comparison when we're comparing
61+
# flattened rgba in an uint32 to a pixel.
62+
mask = list(range(elts_per_pixel))
63+
for x in range(0, img.size[0], int(img.size[0] / 10)):
64+
for y in range(0, img.size[1], int(img.size[1] / 10)):
65+
pixel = px[x, y]
66+
assert isinstance(pixel, tuple)
67+
arr_pixel_int = arr[y * img.width + x].as_py()
68+
arr_pixel_tuple = (
69+
arr_pixel_int % 256,
70+
(arr_pixel_int // 256) % 256,
71+
(arr_pixel_int // 256**2) % 256,
72+
(arr_pixel_int // 256**3),
73+
)
74+
if is_big_endian():
75+
arr_pixel_tuple = arr_pixel_tuple[::-1]
76+
77+
for ix, elt in enumerate(mask):
78+
assert pixel[ix] == arr_pixel_tuple[elt]
79+
80+
81+
fl_uint8_4_type = nanoarrow.fixed_size_list(value_type=nanoarrow.uint8(nullable=False),
82+
list_size=4,
83+
nullable=False)
84+
85+
86+
@pytest.mark.parametrize(
87+
"mode, dtype, mask",
88+
(
89+
("L", nanoarrow.uint8(nullable=False), None),
90+
("I", nanoarrow.int32(nullable=False), None),
91+
("F", nanoarrow.float32(nullable=False), None),
92+
("LA", fl_uint8_4_type, [0, 3]),
93+
("RGB", fl_uint8_4_type, [0, 1, 2]),
94+
("RGBA", fl_uint8_4_type, None),
95+
("RGBX", fl_uint8_4_type, None),
96+
("CMYK", fl_uint8_4_type, None),
97+
("YCbCr", fl_uint8_4_type, [0, 1, 2]),
98+
("HSV", fl_uint8_4_type, [0, 1, 2]),
99+
),
100+
)
101+
def test_to_array(mode: str, dtype: nanoarrow, mask: list[int] | None) -> None:
102+
img = hopper(mode)
103+
104+
# Resize to non-square
105+
img = img.crop((3, 0, 124, 127))
106+
assert img.size == (121, 127)
107+
108+
arr = nanoarrow.Array(img) # type: ignore[call-overload]
109+
_test_img_equals_pyarray(img, arr, mask)
110+
assert arr.schema.type == dtype.type
111+
assert arr.schema.nullable == dtype.nullable
112+
113+
reloaded = Image.fromarrow(arr, mode, img.size)
114+
115+
assert reloaded
116+
117+
assert_image_equal(img, reloaded)
118+
119+
120+
def test_lifetime() -> None:
121+
# valgrind shouldn't error out here.
122+
# arrays should be accessible after the image is deleted.
123+
124+
img = hopper("L")
125+
126+
arr_1 = nanoarrow.Array(img) # type: ignore[call-overload]
127+
arr_2 = nanoarrow.Array(img) # type: ignore[call-overload]
128+
129+
del img
130+
131+
assert sum(arr_1.iter_py()) > 0
132+
del arr_1
133+
134+
assert sum(arr_2.iter_py()) > 0
135+
del arr_2
136+
137+
138+
def test_lifetime2() -> None:
139+
# valgrind shouldn't error out here.
140+
# img should remain after the arrays are collected.
141+
142+
img = hopper("L")
143+
144+
arr_1 = nanoarrow.Array(img) # type: ignore[call-overload]
145+
arr_2 = nanoarrow.Array(img) # type: ignore[call-overload]
146+
147+
assert sum(arr_1.iter_py()) > 0
148+
del arr_1
149+
150+
assert sum(arr_2.iter_py()) > 0
151+
del arr_2
152+
153+
img2 = img.copy()
154+
px = img2.load()
155+
assert px # make mypy happy
156+
assert isinstance(px[0, 0], int)
157+
158+
159+
class DataShape(NamedTuple):
160+
dtype: nanoarrow
161+
# Strictly speaking, elt should be a pixel or pixel component, so
162+
# list[uint8][4], float, int, uint32, uint8, etc. But more
163+
# correctly, it should be exactly the dtype from the line above.
164+
elt: Any
165+
elts_per_pixel: int
166+
167+
168+
UINT_ARR = DataShape(
169+
dtype=fl_uint8_4_type,
170+
elt=[1, 2, 3, 4], # array of 4 uint8 per pixel
171+
elts_per_pixel=1, # only one array per pixel
172+
)
173+
174+
UINT = DataShape(
175+
dtype=nanoarrow.uint8(),
176+
elt=3, # one uint8,
177+
elts_per_pixel=4, # but repeated 4x per pixel
178+
)
179+
180+
UINT32 = DataShape(
181+
dtype=nanoarrow.uint32(),
182+
elt=0xABCDEF45, # one packed int, doesn't fit in a int32 > 0x80000000
183+
elts_per_pixel=1, # one per pixel
184+
)
185+
186+
INT32 = DataShape(
187+
dtype=nanoarrow.uint32(),
188+
elt=0x12CDEF45, # one packed int
189+
elts_per_pixel=1, # one per pixel
190+
)
191+
192+
193+
@pytest.mark.xfail(reason="Support for nested array creation is not available in nanoarrow/python")
194+
@pytest.mark.parametrize(
195+
"mode, data_tp, mask",
196+
(
197+
("L", DataShape(nanoarrow.uint8(), 3, 1), None),
198+
("I", DataShape(nanoarrow.int32(), 1 << 24, 1), None),
199+
("F", DataShape(nanoarrow.float32(), 3.14159, 1), None),
200+
("LA", UINT_ARR, [0, 3]),
201+
("LA", UINT, [0, 3]),
202+
("RGB", UINT_ARR, [0, 1, 2]),
203+
("RGBA", UINT_ARR, None),
204+
("CMYK", UINT_ARR, None),
205+
("YCbCr", UINT_ARR, [0, 1, 2]),
206+
("HSV", UINT_ARR, [0, 1, 2]),
207+
("RGB", UINT, [0, 1, 2]),
208+
("RGBA", UINT, None),
209+
("CMYK", UINT, None),
210+
("YCbCr", UINT, [0, 1, 2]),
211+
("HSV", UINT, [0, 1, 2]),
212+
),
213+
)
214+
def test_fromarray(mode: str, data_tp: DataShape, mask: list[int] | None) -> None:
215+
(dtype, elt, elts_per_pixel) = data_tp
216+
217+
ct_pixels = TEST_IMAGE_SIZE[0] * TEST_IMAGE_SIZE[1]
218+
if dtype == fl_uint8_4_type:
219+
# Apparently there's no good way to create this array from python using nanoarrow
220+
# https://github.com/apache/arrow-nanoarrow/issues/620
221+
# the following lines will fail.
222+
tmp_arr = nanoarrow.c_array(elt * (ct_pixels * elts_per_pixel), schema=nanoarrow.uint8())
223+
arr = nanoarrow.Array(tmp_arr, schema=dtype)
224+
else:
225+
arr = nanoarrow.Array(nanoarrow.c_array([elt] * (ct_pixels * elts_per_pixel), schema=dtype))
226+
img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE)
227+
228+
_test_img_equals_pyarray(img, arr, mask, elts_per_pixel)
229+
230+
231+
@pytest.mark.parametrize(
232+
"mode, data_tp, mask",
233+
(
234+
("LA", UINT32, [0, 3]),
235+
("RGB", UINT32, [0, 1, 2]),
236+
("RGBA", UINT32, None),
237+
("CMYK", UINT32, None),
238+
("YCbCr", UINT32, [0, 1, 2]),
239+
("HSV", UINT32, [0, 1, 2]),
240+
("LA", INT32, [0, 3]),
241+
("RGB", INT32, [0, 1, 2]),
242+
("RGBA", INT32, None),
243+
("CMYK", INT32, None),
244+
("YCbCr", INT32, [0, 1, 2]),
245+
("HSV", INT32, [0, 1, 2]),
246+
),
247+
)
248+
def test_from_int32array(mode: str, data_tp: DataShape, mask: list[int] | None) -> None:
249+
(dtype, elt, elts_per_pixel) = data_tp
250+
251+
ct_pixels = TEST_IMAGE_SIZE[0] * TEST_IMAGE_SIZE[1]
252+
arr = nanoarrow.Array(nanoarrow.c_array([elt] * (ct_pixels * elts_per_pixel), schema=dtype))
253+
img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE)
254+
255+
_test_img_equals_int32_pyarray(img, arr, mask, elts_per_pixel)
256+
257+
258+
@pytest.mark.parametrize(
259+
"mode, metadata",
260+
(
261+
("LA", ["L", "X", "X", "A"]),
262+
("RGB", ["R", "G", "B", "X"]),
263+
("RGBX", ["R", "G", "B", "X"]),
264+
("RGBA", ["R", "G", "B", "A"]),
265+
("CMYK", ["C", "M", "Y", "K"]),
266+
("YCbCr", ["Y", "Cb", "Cr", "X"]),
267+
("HSV", ["H", "S", "V", "X"]),
268+
),
269+
)
270+
def test_image_nested_metadata(mode: str, metadata: list[str]) -> None:
271+
img = hopper(mode)
272+
273+
arr = nanoarrow.Array(img) # type: ignore[call-overload]
274+
275+
assert arr.schema.value_type.metadata
276+
assert arr.schema.value_type.metadata[b"image"]
277+
278+
parsed_metadata = json.loads(arr.schema.value_type.metadata[b"image"].decode("utf8"))
279+
280+
assert "bands" in parsed_metadata
281+
assert parsed_metadata["bands"] == metadata

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ optional-dependencies.test-arrow = [
6060
"arro3-compute",
6161
"arro3-core",
6262
"pyarrow",
63+
"nanoarrow",
6364
]
6465

6566
optional-dependencies.tests = [

0 commit comments

Comments
 (0)