Skip to content

Commit 85212db

Browse files
committed
Add image band metadata for the 4 channel images
1 parent d560320 commit 85212db

File tree

2 files changed

+112
-1
lines changed

2 files changed

+112
-1
lines changed

Tests/test_pyarrow.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import json
34
from typing import Any, NamedTuple
45

56
import pytest
@@ -244,3 +245,29 @@ def test_from_int32array(mode: str, data_tp: DataShape, mask: list[int] | None)
244245
img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE)
245246

246247
_test_img_equals_int32_pyarray(img, arr, mask, elts_per_pixel)
248+
249+
250+
@pytest.mark.parametrize(
251+
"mode, metadata",
252+
(
253+
("LA", ["L", "X", "X", "A"]),
254+
("RGB", ["R", "G", "B", "X"]),
255+
("RGBX", ["R", "G", "B", "X"]),
256+
("RGBA", ["R", "G", "B", "A"]),
257+
("CMYK", ["C", "M", "Y", "K"]),
258+
("YCbCr", ["Y", "Cb", "Cr", "X"]),
259+
("HSV", ["H", "S", "V", "X"]),
260+
),
261+
)
262+
def test_image_metadata(mode: str, metadata: list[str]) -> None:
263+
img = hopper(mode)
264+
265+
arr = pyarrow.array(img) # type: ignore[call-overload]
266+
267+
assert arr.type.field(0).metadata
268+
assert arr.type.field(0).metadata[b"image"]
269+
270+
parsed_metadata = json.loads(arr.type.field(0).metadata[b"image"].decode("utf8"))
271+
272+
assert "bands" in parsed_metadata
273+
assert parsed_metadata["bands"] == metadata

src/libImaging/Arrow.c

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,77 @@ ReleaseExportedSchema(struct ArrowSchema *array) {
5555
// Mark array released
5656
array->release = NULL;
5757
}
58+
char *
59+
image_band_json(Imaging im) {
60+
char *format = "{\"bands\": [\"%s\", \"%s\", \"%s\", \"%s\"]}";
61+
char *json;
62+
// Bands can be 4 bands * 2 characters each
63+
int len = strlen(format) + 8 + 1;
64+
int err;
65+
66+
json = calloc(1, len);
67+
68+
if (!json) {
69+
return NULL;
70+
}
71+
72+
err = PyOS_snprintf(
73+
json,
74+
len,
75+
format,
76+
im->band_names[0],
77+
im->band_names[1],
78+
im->band_names[2],
79+
im->band_names[3]
80+
);
81+
if (err < 0) {
82+
return NULL;
83+
}
84+
return json;
85+
}
86+
87+
char *
88+
assemble_metadata(const char *band_json) {
89+
/* format is
90+
int32: number of key/value pairs (noted N below)
91+
int32: byte length of key 0
92+
key 0 (not null-terminated)
93+
int32: byte length of value 0
94+
value 0 (not null-terminated)
95+
...
96+
int32: byte length of key N - 1
97+
key N - 1 (not null-terminated)
98+
int32: byte length of value N - 1
99+
value N - 1 (not null-terminated)
100+
*/
101+
const char *key = "image";
102+
INT32 key_len = strlen(key);
103+
INT32 band_json_len = strlen(band_json);
104+
105+
char *buf;
106+
INT32 *dest_int;
107+
char *dest;
108+
109+
buf = calloc(1, key_len + band_json_len + 4 + 1 * 8);
110+
if (!buf) {
111+
return NULL;
112+
}
113+
114+
dest_int = (void *)buf;
115+
116+
dest_int[0] = 1;
117+
dest_int[1] = key_len;
118+
dest_int += 2;
119+
dest = (void *)dest_int;
120+
memcpy(dest, key, key_len);
121+
dest += key_len;
122+
dest_int = (void *)dest;
123+
dest_int[0] = band_json_len;
124+
dest_int += 1;
125+
memcpy(dest_int, band_json, band_json_len);
126+
127+
return buf;
128+
}
58129

59130
int
60131
export_named_type(struct ArrowSchema *schema, char *format, char *name) {
@@ -95,6 +166,8 @@ export_named_type(struct ArrowSchema *schema, char *format, char *name) {
95166
int
96167
export_imaging_schema(Imaging im, struct ArrowSchema *schema) {
97168
int retval = 0;
169+
char *metadata;
170+
char *band_json;
98171

99172
if (strcmp(im->arrow_band_format, "") == 0) {
100173
return IMAGING_ARROW_INCOMPATIBLE_MODE;
@@ -117,13 +190,24 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) {
117190
schema->n_children = 1;
118191
schema->children = calloc(1, sizeof(struct ArrowSchema *));
119192
schema->children[0] = (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema));
120-
retval = export_named_type(schema->children[0], im->arrow_band_format, "pixel");
193+
retval = export_named_type(schema->children[0], im->arrow_band_format, im->mode);
121194
if (retval != 0) {
122195
free(schema->children[0]);
123196
free(schema->children);
124197
schema->release(schema);
125198
return retval;
126199
}
200+
201+
// band related metadata
202+
band_json = image_band_json(im);
203+
if (band_json) {
204+
// adding the metadata to the child array.
205+
// Accessible in pyarrow via pa.array(img).type.field(0).metadata
206+
// adding it to the top level is not accessible.
207+
schema->children[0]->metadata = assemble_metadata(band_json);
208+
free(band_json);
209+
}
210+
127211
return 0;
128212
}
129213

0 commit comments

Comments
 (0)