Skip to content

Commit 8ae0c04

Browse files
committed
fix: map view types to avoid to_substrait ArrowNotImplementedError
1 parent e7e1748 commit 8ae0c04

File tree

2 files changed

+107
-1
lines changed

2 files changed

+107
-1
lines changed

vortex-python/python/vortex/arrow/expression.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,26 @@ def ensure_vortex_expression(expression: pc.Expression | Expr | None, *, schema:
2828
return expression
2929

3030

31+
def _schema_for_substrait(schema: pa.Schema) -> pa.Schema:
32+
# PyArrow's to_substrait doesn't support view types; map to string/binary.
33+
# This is safe because Vortex handles both equivalently.
34+
# If/When PyArrow to_substrait supports view types, revert.
35+
# Workaround for: https://github.com/vortex-data/vortex/issues/5759
36+
fields = []
37+
for field in schema:
38+
if field.type == pa.string_view():
39+
fields.append(field.with_type(pa.string()))
40+
elif field.type == pa.binary_view():
41+
fields.append(field.with_type(pa.binary()))
42+
else:
43+
fields.append(field)
44+
return pa.schema(fields)
45+
46+
3147
def arrow_to_vortex(arrow_expression: pc.Expression, schema: pa.Schema) -> Expr:
48+
compat_schema = _schema_for_substrait(schema)
3249
substrait_object = ExtendedExpression() # pyright: ignore[reportUnknownVariableType]
33-
substrait_object.ParseFromString(arrow_expression.to_substrait(schema)) # pyright: ignore[reportUnknownMemberType]
50+
substrait_object.ParseFromString(arrow_expression.to_substrait(compat_schema)) # pyright: ignore[reportUnknownMemberType]
3451

3552
expressions = extended_expression(substrait_object) # pyright: ignore[reportUnknownArgumentType]
3653

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Tests the _schema_for_substrait workaround in vortex/arrow/expression.py
2+
3+
import pyarrow as pa
4+
import pyarrow.compute as pc
5+
import pytest
6+
7+
from vortex.arrow.expression import arrow_to_vortex, _schema_for_substrait
8+
9+
10+
class TestSchemaForSubstrait:
11+
"""Verifies mapping: string_view=>string, binary_view=>binary, else unchanged"""
12+
13+
def test_string_view_mapped_to_string(self):
14+
schema = pa.schema([("col", pa.string_view())])
15+
result = _schema_for_substrait(schema)
16+
assert result.field("col").type == pa.string()
17+
18+
def test_binary_view_mapped_to_binary(self):
19+
schema = pa.schema([("col", pa.binary_view())])
20+
result = _schema_for_substrait(schema)
21+
assert result.field("col").type == pa.binary()
22+
23+
def test_other_types_unchanged(self):
24+
schema = pa.schema([
25+
("int_col", pa.int64()),
26+
("str_col", pa.string()),
27+
("bin_col", pa.binary()),
28+
("float_col", pa.float64()),
29+
])
30+
result = _schema_for_substrait(schema)
31+
assert result == schema
32+
33+
def test_mixed_schema(self):
34+
schema = pa.schema([
35+
("sv", pa.string_view()),
36+
("bv", pa.binary_view()),
37+
("s", pa.string()),
38+
("i", pa.int64()),
39+
])
40+
result = _schema_for_substrait(schema)
41+
expected = pa.schema([
42+
("sv", pa.string()),
43+
("bv", pa.binary()),
44+
("s", pa.string()),
45+
("i", pa.int64()),
46+
])
47+
assert result == expected
48+
49+
50+
class TestArrowToVortexWithViews:
51+
"""Tests comparisons over string_views and binary_views"""
52+
53+
def test_string_view_equality_expression(self):
54+
schema = pa.schema([("name", pa.string_view())])
55+
expr = pc.field("name") == "alice"
56+
vortex_expr = arrow_to_vortex(expr, schema)
57+
assert vortex_expr is not None
58+
59+
def test_binary_view_equality_expression(self):
60+
schema = pa.schema([("data", pa.binary_view())])
61+
expr = pc.field("data") == b"hello"
62+
vortex_expr = arrow_to_vortex(expr, schema)
63+
assert vortex_expr is not None
64+
65+
def test_string_view_comparison_expression(self):
66+
schema = pa.schema([("name", pa.string_view())])
67+
expr = pc.field("name") > "bob"
68+
vortex_expr = arrow_to_vortex(expr, schema)
69+
assert vortex_expr is not None
70+
71+
def test_mixed_view_and_regular_types(self):
72+
schema = pa.schema([
73+
("id", pa.int64()),
74+
("name", pa.string_view()),
75+
("data", pa.binary_view()),
76+
])
77+
expr = (pc.field("id") > 10) & (pc.field("name") == "test")
78+
vortex_expr = arrow_to_vortex(expr, schema)
79+
assert vortex_expr is not None
80+
81+
@pytest.mark.parametrize("view_type,value", [
82+
(pa.string_view(), "test"),
83+
(pa.binary_view(), b"test"),
84+
])
85+
def test_view_types_parametrized(self, view_type, value):
86+
schema = pa.schema([("col", view_type)])
87+
expr = pc.field("col") == value
88+
vortex_expr = arrow_to_vortex(expr, schema)
89+
assert vortex_expr is not None

0 commit comments

Comments
 (0)