|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import json |
| 4 | +from typing import Any, NamedTuple |
| 5 | + |
| 6 | +import pytest |
| 7 | + |
| 8 | +from PIL import Image |
| 9 | + |
| 10 | +from .helper import ( |
| 11 | + assert_deep_equal, |
| 12 | + assert_image_equal, |
| 13 | + hopper, |
| 14 | + is_big_endian, |
| 15 | +) |
| 16 | + |
| 17 | +TYPE_CHECKING = False |
| 18 | +if TYPE_CHECKING: |
| 19 | + import nanoarrow |
| 20 | +else: |
| 21 | + nanoarrow = pytest.importorskip("nanoarrow", reason="Nanoarrow not installed") |
| 22 | + |
| 23 | +TEST_IMAGE_SIZE = (10, 10) |
| 24 | + |
| 25 | + |
| 26 | +def _test_img_equals_pyarray( |
| 27 | + img: Image.Image, arr: Any, mask: list[int] | None, elts_per_pixel: int = 1 |
| 28 | +) -> None: |
| 29 | + assert img.height * img.width * elts_per_pixel == len(arr) |
| 30 | + px = img.load() |
| 31 | + assert px is not None |
| 32 | + if elts_per_pixel > 1 and mask is None: |
| 33 | + # have to do element-wise comparison when we're comparing |
| 34 | + # flattened r,g,b,a to a pixel. |
| 35 | + mask = list(range(elts_per_pixel)) |
| 36 | + for x in range(0, img.size[0], int(img.size[0] / 10)): |
| 37 | + for y in range(0, img.size[1], int(img.size[1] / 10)): |
| 38 | + if mask: |
| 39 | + pixel = px[x, y] |
| 40 | + assert isinstance(pixel, tuple) |
| 41 | + for ix, elt in enumerate(mask): |
| 42 | + if elts_per_pixel == 1: |
| 43 | + assert pixel[ix] == arr[y * img.width + x].as_py()[elt] |
| 44 | + else: |
| 45 | + assert ( |
| 46 | + pixel[ix] |
| 47 | + == arr[(y * img.width + x) * elts_per_pixel + elt].as_py() |
| 48 | + ) |
| 49 | + else: |
| 50 | + assert_deep_equal(px[x, y], arr[y * img.width + x].as_py()) |
| 51 | + |
| 52 | + |
| 53 | +def _test_img_equals_int32_pyarray( |
| 54 | + img: Image.Image, arr: Any, mask: list[int] | None, elts_per_pixel: int = 1 |
| 55 | +) -> None: |
| 56 | + assert img.height * img.width * elts_per_pixel == len(arr) |
| 57 | + px = img.load() |
| 58 | + assert px is not None |
| 59 | + if mask is None: |
| 60 | + # have to do element-wise comparison when we're comparing |
| 61 | + # flattened rgba in an uint32 to a pixel. |
| 62 | + mask = list(range(elts_per_pixel)) |
| 63 | + for x in range(0, img.size[0], int(img.size[0] / 10)): |
| 64 | + for y in range(0, img.size[1], int(img.size[1] / 10)): |
| 65 | + pixel = px[x, y] |
| 66 | + assert isinstance(pixel, tuple) |
| 67 | + arr_pixel_int = arr[y * img.width + x].as_py() |
| 68 | + arr_pixel_tuple = ( |
| 69 | + arr_pixel_int % 256, |
| 70 | + (arr_pixel_int // 256) % 256, |
| 71 | + (arr_pixel_int // 256**2) % 256, |
| 72 | + (arr_pixel_int // 256**3), |
| 73 | + ) |
| 74 | + if is_big_endian(): |
| 75 | + arr_pixel_tuple = arr_pixel_tuple[::-1] |
| 76 | + |
| 77 | + for ix, elt in enumerate(mask): |
| 78 | + assert pixel[ix] == arr_pixel_tuple[elt] |
| 79 | + |
| 80 | + |
| 81 | +fl_uint8_4_type = nanoarrow.fixed_size_list(value_type=nanoarrow.uint8(nullable=False), |
| 82 | + list_size=4, |
| 83 | + nullable=False) |
| 84 | + |
| 85 | + |
| 86 | +@pytest.mark.parametrize( |
| 87 | + "mode, dtype, mask", |
| 88 | + ( |
| 89 | + ("L", nanoarrow.uint8(nullable=False), None), |
| 90 | + ("I", nanoarrow.int32(nullable=False), None), |
| 91 | + ("F", nanoarrow.float32(nullable=False), None), |
| 92 | + ("LA", fl_uint8_4_type, [0, 3]), |
| 93 | + ("RGB", fl_uint8_4_type, [0, 1, 2]), |
| 94 | + ("RGBA", fl_uint8_4_type, None), |
| 95 | + ("RGBX", fl_uint8_4_type, None), |
| 96 | + ("CMYK", fl_uint8_4_type, None), |
| 97 | + ("YCbCr", fl_uint8_4_type, [0, 1, 2]), |
| 98 | + ("HSV", fl_uint8_4_type, [0, 1, 2]), |
| 99 | + ), |
| 100 | +) |
| 101 | +def test_to_array(mode: str, dtype: nanoarrow, mask: list[int] | None) -> None: |
| 102 | + img = hopper(mode) |
| 103 | + |
| 104 | + # Resize to non-square |
| 105 | + img = img.crop((3, 0, 124, 127)) |
| 106 | + assert img.size == (121, 127) |
| 107 | + |
| 108 | + arr = nanoarrow.Array(img) # type: ignore[call-overload] |
| 109 | + _test_img_equals_pyarray(img, arr, mask) |
| 110 | + assert arr.schema.type == dtype.type |
| 111 | + assert arr.schema.nullable == dtype.nullable |
| 112 | + |
| 113 | + reloaded = Image.fromarrow(arr, mode, img.size) |
| 114 | + |
| 115 | + assert reloaded |
| 116 | + |
| 117 | + assert_image_equal(img, reloaded) |
| 118 | + |
| 119 | + |
| 120 | +def test_lifetime() -> None: |
| 121 | + # valgrind shouldn't error out here. |
| 122 | + # arrays should be accessible after the image is deleted. |
| 123 | + |
| 124 | + img = hopper("L") |
| 125 | + |
| 126 | + arr_1 = nanoarrow.Array(img) # type: ignore[call-overload] |
| 127 | + arr_2 = nanoarrow.Array(img) # type: ignore[call-overload] |
| 128 | + |
| 129 | + del img |
| 130 | + |
| 131 | + assert sum(arr_1.iter_py()) > 0 |
| 132 | + del arr_1 |
| 133 | + |
| 134 | + assert sum(arr_2.iter_py()) > 0 |
| 135 | + del arr_2 |
| 136 | + |
| 137 | + |
| 138 | +def test_lifetime2() -> None: |
| 139 | + # valgrind shouldn't error out here. |
| 140 | + # img should remain after the arrays are collected. |
| 141 | + |
| 142 | + img = hopper("L") |
| 143 | + |
| 144 | + arr_1 = nanoarrow.Array(img) # type: ignore[call-overload] |
| 145 | + arr_2 = nanoarrow.Array(img) # type: ignore[call-overload] |
| 146 | + |
| 147 | + assert sum(arr_1.iter_py()) > 0 |
| 148 | + del arr_1 |
| 149 | + |
| 150 | + assert sum(arr_2.iter_py()) > 0 |
| 151 | + del arr_2 |
| 152 | + |
| 153 | + img2 = img.copy() |
| 154 | + px = img2.load() |
| 155 | + assert px # make mypy happy |
| 156 | + assert isinstance(px[0, 0], int) |
| 157 | + |
| 158 | + |
| 159 | +class DataShape(NamedTuple): |
| 160 | + dtype: nanoarrow |
| 161 | + # Strictly speaking, elt should be a pixel or pixel component, so |
| 162 | + # list[uint8][4], float, int, uint32, uint8, etc. But more |
| 163 | + # correctly, it should be exactly the dtype from the line above. |
| 164 | + elt: Any |
| 165 | + elts_per_pixel: int |
| 166 | + |
| 167 | + |
| 168 | +UINT_ARR = DataShape( |
| 169 | + dtype=fl_uint8_4_type, |
| 170 | + elt=[1, 2, 3, 4], # array of 4 uint8 per pixel |
| 171 | + elts_per_pixel=1, # only one array per pixel |
| 172 | +) |
| 173 | + |
| 174 | +UINT = DataShape( |
| 175 | + dtype=nanoarrow.uint8(), |
| 176 | + elt=3, # one uint8, |
| 177 | + elts_per_pixel=4, # but repeated 4x per pixel |
| 178 | +) |
| 179 | + |
| 180 | +UINT32 = DataShape( |
| 181 | + dtype=nanoarrow.uint32(), |
| 182 | + elt=0xABCDEF45, # one packed int, doesn't fit in a int32 > 0x80000000 |
| 183 | + elts_per_pixel=1, # one per pixel |
| 184 | +) |
| 185 | + |
| 186 | +INT32 = DataShape( |
| 187 | + dtype=nanoarrow.uint32(), |
| 188 | + elt=0x12CDEF45, # one packed int |
| 189 | + elts_per_pixel=1, # one per pixel |
| 190 | +) |
| 191 | + |
| 192 | + |
| 193 | +@pytest.mark.xfail(reason="Support for nested array creation is not available in nanoarrow/python") |
| 194 | +@pytest.mark.parametrize( |
| 195 | + "mode, data_tp, mask", |
| 196 | + ( |
| 197 | + ("L", DataShape(nanoarrow.uint8(), 3, 1), None), |
| 198 | + ("I", DataShape(nanoarrow.int32(), 1 << 24, 1), None), |
| 199 | + ("F", DataShape(nanoarrow.float32(), 3.14159, 1), None), |
| 200 | + ("LA", UINT_ARR, [0, 3]), |
| 201 | + ("LA", UINT, [0, 3]), |
| 202 | + ("RGB", UINT_ARR, [0, 1, 2]), |
| 203 | + ("RGBA", UINT_ARR, None), |
| 204 | + ("CMYK", UINT_ARR, None), |
| 205 | + ("YCbCr", UINT_ARR, [0, 1, 2]), |
| 206 | + ("HSV", UINT_ARR, [0, 1, 2]), |
| 207 | + ("RGB", UINT, [0, 1, 2]), |
| 208 | + ("RGBA", UINT, None), |
| 209 | + ("CMYK", UINT, None), |
| 210 | + ("YCbCr", UINT, [0, 1, 2]), |
| 211 | + ("HSV", UINT, [0, 1, 2]), |
| 212 | + ), |
| 213 | +) |
| 214 | +def test_fromarray(mode: str, data_tp: DataShape, mask: list[int] | None) -> None: |
| 215 | + (dtype, elt, elts_per_pixel) = data_tp |
| 216 | + |
| 217 | + ct_pixels = TEST_IMAGE_SIZE[0] * TEST_IMAGE_SIZE[1] |
| 218 | + if dtype == fl_uint8_4_type: |
| 219 | + # Apparently there's no good way to create this array from python using nanoarrow |
| 220 | + # https://github.com/apache/arrow-nanoarrow/issues/620 |
| 221 | + # the following lines will fail. |
| 222 | + tmp_arr = nanoarrow.c_array(elt * (ct_pixels * elts_per_pixel), schema=nanoarrow.uint8()) |
| 223 | + arr = nanoarrow.Array(tmp_arr, schema=dtype) |
| 224 | + else: |
| 225 | + arr = nanoarrow.Array(nanoarrow.c_array([elt] * (ct_pixels * elts_per_pixel), schema=dtype)) |
| 226 | + img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE) |
| 227 | + |
| 228 | + _test_img_equals_pyarray(img, arr, mask, elts_per_pixel) |
| 229 | + |
| 230 | + |
| 231 | +@pytest.mark.parametrize( |
| 232 | + "mode, data_tp, mask", |
| 233 | + ( |
| 234 | + ("LA", UINT32, [0, 3]), |
| 235 | + ("RGB", UINT32, [0, 1, 2]), |
| 236 | + ("RGBA", UINT32, None), |
| 237 | + ("CMYK", UINT32, None), |
| 238 | + ("YCbCr", UINT32, [0, 1, 2]), |
| 239 | + ("HSV", UINT32, [0, 1, 2]), |
| 240 | + ("LA", INT32, [0, 3]), |
| 241 | + ("RGB", INT32, [0, 1, 2]), |
| 242 | + ("RGBA", INT32, None), |
| 243 | + ("CMYK", INT32, None), |
| 244 | + ("YCbCr", INT32, [0, 1, 2]), |
| 245 | + ("HSV", INT32, [0, 1, 2]), |
| 246 | + ), |
| 247 | +) |
| 248 | +def test_from_int32array(mode: str, data_tp: DataShape, mask: list[int] | None) -> None: |
| 249 | + (dtype, elt, elts_per_pixel) = data_tp |
| 250 | + |
| 251 | + ct_pixels = TEST_IMAGE_SIZE[0] * TEST_IMAGE_SIZE[1] |
| 252 | + arr = nanoarrow.Array(nanoarrow.c_array([elt] * (ct_pixels * elts_per_pixel), schema=dtype)) |
| 253 | + img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE) |
| 254 | + |
| 255 | + _test_img_equals_int32_pyarray(img, arr, mask, elts_per_pixel) |
| 256 | + |
| 257 | + |
| 258 | +@pytest.mark.parametrize( |
| 259 | + "mode, metadata", |
| 260 | + ( |
| 261 | + ("LA", ["L", "X", "X", "A"]), |
| 262 | + ("RGB", ["R", "G", "B", "X"]), |
| 263 | + ("RGBX", ["R", "G", "B", "X"]), |
| 264 | + ("RGBA", ["R", "G", "B", "A"]), |
| 265 | + ("CMYK", ["C", "M", "Y", "K"]), |
| 266 | + ("YCbCr", ["Y", "Cb", "Cr", "X"]), |
| 267 | + ("HSV", ["H", "S", "V", "X"]), |
| 268 | + ), |
| 269 | +) |
| 270 | +def test_image_nested_metadata(mode: str, metadata: list[str]) -> None: |
| 271 | + img = hopper(mode) |
| 272 | + |
| 273 | + arr = nanoarrow.Array(img) # type: ignore[call-overload] |
| 274 | + |
| 275 | + assert arr.schema.value_type.metadata |
| 276 | + assert arr.schema.value_type.metadata[b"image"] |
| 277 | + |
| 278 | + parsed_metadata = json.loads(arr.schema.value_type.metadata[b"image"].decode("utf8")) |
| 279 | + |
| 280 | + assert "bands" in parsed_metadata |
| 281 | + assert parsed_metadata["bands"] == metadata |
0 commit comments