Skip to content

Commit a2a4a12

Browse files
committed
Fix VECTOR workaround: use custom metadata key
1 parent 3898cad commit a2a4a12

File tree

3 files changed

+11
-14
lines changed

3 files changed

+11
-14
lines changed

python/databend_udf/udf.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@
4949
MAX_DECIMAL256_PRECISION = 76
5050
EXTENSION_KEY = b"Extension"
5151
ARROW_EXT_TYPE_VARIANT = b"Variant"
52-
ARROW_EXT_TYPE_VECTOR = b"Vector"
52+
# Use a custom key to avoid triggering Arrow Extension Type mechanism in Databend
53+
VECTOR_METADATA_KEY = b"x-databend-vector"
54+
5355

5456
TIMESTAMP_UINT = "us"
5557
_SCHEMA_METADATA_INPUT_COUNT_KEY = b"x-databend-udf-input-count"
@@ -1413,8 +1415,7 @@ def _type_str_to_arrow_field_inner(type_str: str) -> pa.Field:
14131415
pa.list_(pa.field("item", pa.float32(), nullable=True)),
14141416
nullable=False,
14151417
metadata={
1416-
EXTENSION_KEY: ARROW_EXT_TYPE_VECTOR,
1417-
b"vector_size": str(dim).encode(),
1418+
VECTOR_METADATA_KEY: str(dim).encode(),
14181419
},
14191420
)
14201421
else:
@@ -1440,8 +1441,8 @@ def _field_type_to_string(field: pa.Field) -> str:
14401441
Convert a `pyarrow.DataType` to a SQL data type string.
14411442
"""
14421443
t = field.type
1443-
if field.metadata and field.metadata.get(EXTENSION_KEY) == ARROW_EXT_TYPE_VECTOR:
1444-
dim = int(field.metadata.get(b"vector_size", b"0"))
1444+
if field.metadata and field.metadata.get(VECTOR_METADATA_KEY):
1445+
dim = int(field.metadata.get(VECTOR_METADATA_KEY))
14451446
return f"VECTOR({dim})"
14461447

14471448
if pa.types.is_boolean(t):

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ classifiers = [
77
description = "Databend UDF Server"
88
license = { text = "Apache-2.0" }
99
name = "databend-udf"
10-
version = "0.2.15"
10+
version = "0.2.16"
1111
readme = "README.md"
1212
requires-python = ">=3.7"
1313
dependencies = [

python/tests/test_vector_type.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ def test_vector_type_parsing():
2525
field = _type_str_to_arrow_field("VECTOR(1024)")
2626
# Should be List type with metadata, not FixedSizeList
2727
assert pa.types.is_list(field.type)
28-
assert field.metadata[b"Extension"] == b"Vector"
29-
assert field.metadata[b"vector_size"] == b"1024"
28+
assert field.metadata[b"x-databend-vector"] == b"1024"
3029
assert pa.types.is_float32(field.type.value_type)
3130
# Default is nullable
3231
assert field.nullable is True
@@ -43,8 +42,7 @@ def test_vector_type_formatting():
4342
pa.list_(pa.field("item", pa.float32(), nullable=True)),
4443
nullable=False,
4544
metadata={
46-
b"Extension": b"Vector",
47-
b"vector_size": b"1024",
45+
b"x-databend-vector": b"1024",
4846
},
4947
)
5048
type_str = _field_type_to_string(field)
@@ -58,8 +56,7 @@ def test_vector_input_processing():
5856
pa.list_(pa.field("item", pa.float32(), nullable=True)),
5957
nullable=False,
6058
metadata={
61-
b"Extension": b"Vector",
62-
b"vector_size": b"3",
59+
b"x-databend-vector": b"3",
6360
},
6461
)
6562
func = _input_process_func(field)
@@ -77,8 +74,7 @@ def test_vector_output_processing():
7774
pa.list_(pa.field("item", pa.float32(), nullable=True)),
7875
nullable=False,
7976
metadata={
80-
b"Extension": b"Vector",
81-
b"vector_size": b"3",
77+
b"x-databend-vector": b"3",
8278
},
8379
)
8480
func = _output_process_func(field)

0 commit comments

Comments
 (0)