|
| 1 | +# Tests the _schema_for_substrait workaround in vortex/arrow/expression.py |
| 2 | + |
| 3 | +import pyarrow as pa |
| 4 | +import pyarrow.compute as pc |
| 5 | +import pytest |
| 6 | + |
| 7 | +from vortex.arrow.expression import arrow_to_vortex, _schema_for_substrait |
| 8 | + |
| 9 | + |
| 10 | +class TestSchemaForSubstrait: |
| 11 | + """Verifies mapping: string_view=>string, binary_view=>binary, else unchanged""" |
| 12 | + |
| 13 | + def test_string_view_mapped_to_string(self): |
| 14 | + schema = pa.schema([("col", pa.string_view())]) |
| 15 | + result = _schema_for_substrait(schema) |
| 16 | + assert result.field("col").type == pa.string() |
| 17 | + |
| 18 | + def test_binary_view_mapped_to_binary(self): |
| 19 | + schema = pa.schema([("col", pa.binary_view())]) |
| 20 | + result = _schema_for_substrait(schema) |
| 21 | + assert result.field("col").type == pa.binary() |
| 22 | + |
| 23 | + def test_other_types_unchanged(self): |
| 24 | + schema = pa.schema([ |
| 25 | + ("int_col", pa.int64()), |
| 26 | + ("str_col", pa.string()), |
| 27 | + ("bin_col", pa.binary()), |
| 28 | + ("float_col", pa.float64()), |
| 29 | + ]) |
| 30 | + result = _schema_for_substrait(schema) |
| 31 | + assert result == schema |
| 32 | + |
| 33 | + def test_mixed_schema(self): |
| 34 | + schema = pa.schema([ |
| 35 | + ("sv", pa.string_view()), |
| 36 | + ("bv", pa.binary_view()), |
| 37 | + ("s", pa.string()), |
| 38 | + ("i", pa.int64()), |
| 39 | + ]) |
| 40 | + result = _schema_for_substrait(schema) |
| 41 | + expected = pa.schema([ |
| 42 | + ("sv", pa.string()), |
| 43 | + ("bv", pa.binary()), |
| 44 | + ("s", pa.string()), |
| 45 | + ("i", pa.int64()), |
| 46 | + ]) |
| 47 | + assert result == expected |
| 48 | + |
| 49 | + |
| 50 | +class TestArrowToVortexWithViews: |
| 51 | + """Tests comparisons over string_views and binary_views""" |
| 52 | + |
| 53 | + def test_string_view_equality_expression(self): |
| 54 | + schema = pa.schema([("name", pa.string_view())]) |
| 55 | + expr = pc.field("name") == "alice" |
| 56 | + vortex_expr = arrow_to_vortex(expr, schema) |
| 57 | + assert vortex_expr is not None |
| 58 | + |
| 59 | + def test_binary_view_equality_expression(self): |
| 60 | + schema = pa.schema([("data", pa.binary_view())]) |
| 61 | + expr = pc.field("data") == b"hello" |
| 62 | + vortex_expr = arrow_to_vortex(expr, schema) |
| 63 | + assert vortex_expr is not None |
| 64 | + |
| 65 | + def test_string_view_comparison_expression(self): |
| 66 | + schema = pa.schema([("name", pa.string_view())]) |
| 67 | + expr = pc.field("name") > "bob" |
| 68 | + vortex_expr = arrow_to_vortex(expr, schema) |
| 69 | + assert vortex_expr is not None |
| 70 | + |
| 71 | + def test_mixed_view_and_regular_types(self): |
| 72 | + schema = pa.schema([ |
| 73 | + ("id", pa.int64()), |
| 74 | + ("name", pa.string_view()), |
| 75 | + ("data", pa.binary_view()), |
| 76 | + ]) |
| 77 | + expr = (pc.field("id") > 10) & (pc.field("name") == "test") |
| 78 | + vortex_expr = arrow_to_vortex(expr, schema) |
| 79 | + assert vortex_expr is not None |
| 80 | + |
| 81 | + @pytest.mark.parametrize("view_type,value", [ |
| 82 | + (pa.string_view(), "test"), |
| 83 | + (pa.binary_view(), b"test"), |
| 84 | + ]) |
| 85 | + def test_view_types_parametrized(self, view_type, value): |
| 86 | + schema = pa.schema([("col", view_type)]) |
| 87 | + expr = pc.field("col") == value |
| 88 | + vortex_expr = arrow_to_vortex(expr, schema) |
| 89 | + assert vortex_expr is not None |
0 commit comments