Skip to content

Commit 6a43281

Browse files
committed
Fix VECTOR type: use FixedSizeList with Extension metadata
1 parent 60f3533 commit 6a43281

File tree

3 files changed

+12
-11
lines changed

3 files changed

+12
-11
lines changed

python/databend_udf/udf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,10 +1408,10 @@ def _type_str_to_arrow_field_inner(type_str: str) -> pa.Field:
14081408
# VECTOR(1024)
14091409
dim = int(type_str[6:].strip("()").strip())
14101410
# Use List(float) with metadata to represent VECTOR(N)
1411-
# This is a workaround because Databend UDF client might not support FixedSizeList yet.
1411+
# Use FixedSizeList with Extension metadata for VECTOR type
14121412
return pa.field(
14131413
"",
1414-
pa.list_(pa.field("item", pa.float32(), nullable=True)),
1414+
pa.list_(pa.field("item", pa.float32(), nullable=True), list_size=dim),
14151415
nullable=False,
14161416
metadata={
14171417
EXTENSION_KEY: ARROW_EXT_TYPE_VECTOR,

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ classifiers = [
77
description = "Databend UDF Server"
88
license = { text = "Apache-2.0" }
99
name = "databend-udf"
10-
version = "0.2.17"
10+
version = "0.2.18"
1111
readme = "README.md"
1212
requires-python = ">=3.7"
1313
dependencies = [

python/tests/test_vector_type.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ def test_vector_sql_generation():
2323

2424
def test_vector_type_parsing():
2525
field = _type_str_to_arrow_field("VECTOR(1024)")
26-
# Should be List type with metadata, not FixedSizeList
27-
assert pa.types.is_list(field.type)
26+
# Should be FixedSizeList type with metadata
27+
assert pa.types.is_fixed_size_list(field.type)
28+
assert field.type.list_size == 1024
2829
assert field.metadata[b"Extension"] == b"Vector"
2930
assert field.metadata[b"vector_size"] == b"1024"
3031
assert pa.types.is_float32(field.type.value_type)
@@ -37,10 +38,10 @@ def test_vector_type_parsing():
3738

3839

3940
def test_vector_type_formatting():
40-
# Test that a List with VECTOR metadata is formatted as VECTOR(N)
41+
# Test that a FixedSizeList with VECTOR metadata is formatted as VECTOR(N)
4142
field = pa.field(
4243
"",
43-
pa.list_(pa.field("item", pa.float32(), nullable=True)),
44+
pa.list_(pa.field("item", pa.float32(), nullable=True), list_size=1024),
4445
nullable=False,
4546
metadata={
4647
b"Extension": b"Vector",
@@ -52,10 +53,10 @@ def test_vector_type_formatting():
5253

5354

5455
def test_vector_input_processing():
55-
# Input processing should handle List (which is what VECTOR is physically)
56+
# Input processing should handle FixedSizeList
5657
field = pa.field(
5758
"",
58-
pa.list_(pa.field("item", pa.float32(), nullable=True)),
59+
pa.list_(pa.field("item", pa.float32(), nullable=True), list_size=3),
5960
nullable=False,
6061
metadata={
6162
b"Extension": b"Vector",
@@ -71,10 +72,10 @@ def test_vector_input_processing():
7172

7273

7374
def test_vector_output_processing():
74-
# Output processing should handle List
75+
# Output processing should handle FixedSizeList
7576
field = pa.field(
7677
"",
77-
pa.list_(pa.field("item", pa.float32(), nullable=True)),
78+
pa.list_(pa.field("item", pa.float32(), nullable=True), list_size=3),
7879
nullable=False,
7980
metadata={
8081
b"Extension": b"Vector",

0 commit comments

Comments
 (0)