From 92943cc2df4787761792b56c75a988da9bb9e5f0 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sat, 4 Oct 2025 16:26:04 +0200 Subject: [PATCH 1/3] patch hugeint to polars decimal --- duckdb/polars_io.py | 9 +++++++-- tests/fast/arrow/test_polars.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/duckdb/polars_io.py b/duckdb/polars_io.py index f43e0afd..b1c1e719 100644 --- a/duckdb/polars_io.py +++ b/duckdb/polars_io.py @@ -176,9 +176,14 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str: if dtype.startswith("{'Decimal'") or dtype == "Decimal": decimal_value = value["Decimal"] assert isinstance(decimal_value, list), ( - f"A {dtype} should be a two member list but got {type(decimal_value)}" + f"A {dtype} should be a two or three member list but got {type(decimal_value)}" ) - return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])) + if len(decimal_value) == 2: # pre-polars 1.34.0 + return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])) + assert len(decimal_value) == 3, ( # since polars 1.34.0 + f"A {dtype} should be a two or three member list but got {len(decimal_value)} member list" + ) + return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[2])) # Datetime with microseconds since epoch if dtype.startswith("{'Datetime'") or dtype == "Datetime": diff --git a/tests/fast/arrow/test_polars.py b/tests/fast/arrow/test_polars.py index 705532c8..9b354e7a 100644 --- a/tests/fast/arrow/test_polars.py +++ b/tests/fast/arrow/test_polars.py @@ -175,7 +175,7 @@ def test_polars_column_with_tricky_name(self, duckdb_cursor): "UBIGINT", "FLOAT", "DOUBLE", - # "HUGEINT", + "HUGEINT", "DECIMAL(4,1)", "DECIMAL(9,1)", "DECIMAL(18,4)", From 252a35de4f07e0c2f6351968d6ca0a02793a10ff Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sat, 4 Oct 2025 19:18:05 +0200 Subject: [PATCH 2/3] tests --- duckdb/polars_io.py | 10 +++---- tests/fast/arrow/test_polars.py | 53 ++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/duckdb/polars_io.py b/duckdb/polars_io.py index b1c1e719..1eddb593 100644 --- a/duckdb/polars_io.py +++ b/duckdb/polars_io.py @@ -1,5 +1,6 @@ from __future__ import annotations # noqa: D100 +import contextlib import datetime import json import typing @@ -178,12 +179,10 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str: assert isinstance(decimal_value, list), ( f"A {dtype} should be a two or three member list but got {type(decimal_value)}" ) - if len(decimal_value) == 2: # pre-polars 1.34.0 - return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])) - assert len(decimal_value) == 3, ( # since polars 1.34.0 + assert 2 >= len(decimal_value) <= 3, ( f"A {dtype} should be a two or three member list but got {len(decimal_value)} member list" ) - return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[2])) + return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[-1])) # Datetime with microseconds since epoch if dtype.startswith("{'Datetime'") or dtype == "Datetime": @@ -265,7 +264,8 @@ def source_generator( relation_final = relation_final.limit(n_rows) if predicate is not None: # We have a predicate, if possible, we push it down to DuckDB - duck_predicate = _predicate_to_expression(predicate) + with contextlib.suppress(AssertionError, KeyError): + duck_predicate = _predicate_to_expression(predicate) # Try to pushdown filter, if one exists if duck_predicate is not None: relation_final = relation_final.filter(duck_predicate) diff --git a/tests/fast/arrow/test_polars.py b/tests/fast/arrow/test_polars.py index 9b354e7a..902dd072 100644 --- a/tests/fast/arrow/test_polars.py +++ b/tests/fast/arrow/test_polars.py @@ -1,4 +1,5 @@ import datetime +import json import pytest @@ -8,7 +9,7 @@ arrow = pytest.importorskip("pyarrow") pl_testing = pytest.importorskip("polars.testing") -from duckdb.polars_io import _predicate_to_expression # noqa: E402 +from duckdb.polars_io import _pl_tree_to_sql, _predicate_to_expression # noqa: E402 def valid_filter(filter): @@ -605,3 +606,53 @@ def test_polars_lazy_many_batches(self, duckdb_cursor): correct = duckdb_cursor.execute("FROM t").fetchall() assert res == correct + + def test_invalid_expr_json(self): + bad_key_expr = """ + { + "BinaryExpr": { + "left": { "Column": "foo" }, + "middle": "Gt", + "right": { "Literal": { "Int": 5 } } + } + } + """ + with pytest.raises(KeyError, match="'op'"): + _pl_tree_to_sql(json.loads(bad_key_expr)) + + bad_type_expr = """ + { + "BinaryExpr": { + "left": { "Column": [ "foo" ] }, + "op": "Gt", + "right": { "Literal": { "Int": 5 } } + } + } + """ + with pytest.raises(AssertionError, match="The col name of a Column should be a str but got"): + _pl_tree_to_sql(json.loads(bad_type_expr)) + + def test_old_dec(self): + bad_key_expr = """ + { + "BinaryExpr": { + "left": { "Column": "foo" }, + "middle": "Gt", + "right": { "Literal": { "Int": 5 } } + } + } + """ + with pytest.raises(KeyError, match="'op'"): + _pl_tree_to_sql(json.loads(bad_key_expr)) + + bad_type_expr = """ + { + "BinaryExpr": { + "left": { "Column": [ "foo" ] }, + "op": "Gt", + "right": { "Literal": { "Int": 5 } } + } + } + """ + with pytest.raises(AssertionError, match="The col name of a Column should be a str but got"): + _pl_tree_to_sql(json.loads(bad_type_expr)) From dca3ccbd29e43695af6fcff61f271bae62f4be47 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sat, 4 Oct 2025 19:25:22 +0200 Subject: [PATCH 3/3] tests --- duckdb/polars_io.py | 2 +- tests/fast/arrow/test_polars.py | 41 +++++++++++++++------------------ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/duckdb/polars_io.py b/duckdb/polars_io.py index 1eddb593..61ead5bc 100644 --- a/duckdb/polars_io.py +++ b/duckdb/polars_io.py @@ -179,7 +179,7 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str: assert isinstance(decimal_value, list), ( f"A {dtype} should be a two or three member list but got {type(decimal_value)}" ) - assert 2 >= len(decimal_value) <= 3, ( + assert 2 <= len(decimal_value) <= 3, ( f"A {dtype} should be a two or three member list but got {len(decimal_value)} member list" ) return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[-1])) diff --git a/tests/fast/arrow/test_polars.py b/tests/fast/arrow/test_polars.py index 902dd072..d5621701 100644 --- a/tests/fast/arrow/test_polars.py +++ b/tests/fast/arrow/test_polars.py @@ -632,27 +632,24 @@ def test_invalid_expr_json(self): with pytest.raises(AssertionError, match="The col name of a Column should be a str but got"): _pl_tree_to_sql(json.loads(bad_type_expr)) - def test_old_dec(self): - bad_key_expr = """ - { - "BinaryExpr": { - "left": { "Column": "foo" }, - "middle": "Gt", - "right": { "Literal": { "Int": 5 } } - } - } + def test_decimal_scale(self): + scalar_decimal_no_scale = """ + { "Scalar": { + "Decimal": [ + 1, + 0 + ] + } } """ - with pytest.raises(KeyError, match="'op'"): - _pl_tree_to_sql(json.loads(bad_key_expr)) - - bad_type_expr = """ - { - "BinaryExpr": { - "left": { "Column": [ "foo" ] }, - "op": "Gt", - "right": { "Literal": { "Int": 5 } } - } - } + assert _pl_tree_to_sql(json.loads(scalar_decimal_no_scale)) == "1" + + scalar_decimal_scale = """ + { "Scalar": { + "Decimal": [ + 1, + 38, + 0 + ] + } } """ - with pytest.raises(AssertionError, match="The col name of a Column should be a str but got"): - _pl_tree_to_sql(json.loads(bad_type_expr)) + assert _pl_tree_to_sql(json.loads(scalar_decimal_scale)) == "1"