Skip to content

Commit 8e47663

Browse files
author
Evert
committed
Parity with v1.3.1
1 parent 057c560 commit 8e47663

File tree

8 files changed

+196
-64
lines changed

8 files changed

+196
-64
lines changed

external/duckdb

Submodule duckdb updated 266 files

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ requires = [
6868
]
6969

7070
[tool.scikit-build]
71-
logging.level = "DEBUG"
7271
minimum-version = "0.10"
7372
cmake.version = ">=3.29.0"
7473
ninja.version = ">=1.10"

src/duckdb_py/arrow/arrow_array_stream.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,10 @@ py::object TransformFilterRecursive(TableFilter &filter, vector<string> column_r
447447
}
448448
return TransformFilterRecursive(or_filter, column_ref, timezone_config, type);
449449
}
450+
case TableFilterType::DYNAMIC_FILTER: {
451+
//! Ignore dynamic filters for now, not necessary for correctness
452+
return py::none();
453+
}
450454
default:
451455
throw NotImplementedException("Pushdown Filter Type %s is not currently supported in PyArrow Scans",
452456
EnumUtil::ToString(filter.filter_type));

tests/fast/arrow/test_arrow_decimal_32_64.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
class TestArrowDecimalTypes(object):
99
def test_decimal_32(self, duckdb_cursor):
1010
duckdb_cursor = duckdb.connect()
11+
duckdb_cursor.execute('SET arrow_output_version = 1.5')
1112
decimal_32 = pa.Table.from_pylist(
1213
[
1314
{"data": Decimal("100.20")},
@@ -36,6 +37,7 @@ def test_decimal_32(self, duckdb_cursor):
3637

3738
def test_decimal_64(self, duckdb_cursor):
3839
duckdb_cursor = duckdb.connect()
40+
duckdb_cursor.execute('SET arrow_output_version = 1.5')
3941
decimal_64 = pa.Table.from_pylist(
4042
[
4143
{"data": Decimal("1000.231")},

tests/fast/arrow/test_arrow_offsets.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@ def expected_result(col1_null, col2_null, expected):
7575
return [(col1, col2)]
7676

7777

78-
test_nulls = lambda: mark.parametrize(
78+
null_test_parameters = lambda: mark.parametrize(
7979
['col1_null', 'col2_null'], [(False, True), (True, False), (True, True), (False, False)]
8080
)
8181

8282

8383
class TestArrowOffsets(object):
84-
@test_nulls()
84+
@null_test_parameters()
8585
def test_struct_of_strings(self, duckdb_cursor, col1_null, col2_null):
8686
col1 = [str(i) for i in range(0, MAGIC_ARRAY_SIZE)]
8787
if col1_null:
@@ -105,7 +105,7 @@ def test_struct_of_strings(self, duckdb_cursor, col1_null, col2_null):
105105
).fetchall()
106106
assert res == expected_result(col1_null, col2_null, '131072')
107107

108-
@test_nulls()
108+
@null_test_parameters()
109109
def test_struct_of_bools(self, duckdb_cursor, col1_null, col2_null):
110110
tuples = [False for i in range(0, MAGIC_ARRAY_SIZE)]
111111
tuples[-1] = True
@@ -138,7 +138,7 @@ def test_struct_of_bools(self, duckdb_cursor, col1_null, col2_null):
138138
(pa_date64(), datetime.date(1970, 1, 1)),
139139
],
140140
)
141-
@test_nulls()
141+
@null_test_parameters()
142142
def test_struct_of_dates(self, duckdb_cursor, constructor, expected, col1_null, col2_null):
143143
tuples = [i for i in range(0, MAGIC_ARRAY_SIZE)]
144144

@@ -163,7 +163,7 @@ def test_struct_of_dates(self, duckdb_cursor, constructor, expected, col1_null,
163163
).fetchall()
164164
assert res == expected_result(col1_null, col2_null, expected)
165165

166-
@test_nulls()
166+
@null_test_parameters()
167167
def test_struct_of_enum(self, duckdb_cursor, col1_null, col2_null):
168168
enum_type = pa.dictionary(pa.int64(), pa.utf8())
169169

@@ -190,7 +190,7 @@ def test_struct_of_enum(self, duckdb_cursor, col1_null, col2_null):
190190
).fetchall()
191191
assert res == expected_result(col1_null, col2_null, 'green')
192192

193-
@test_nulls()
193+
@null_test_parameters()
194194
def test_struct_of_blobs(self, duckdb_cursor, col1_null, col2_null):
195195
col1 = [str(i) for i in range(0, MAGIC_ARRAY_SIZE)]
196196
if col1_null:
@@ -214,7 +214,7 @@ def test_struct_of_blobs(self, duckdb_cursor, col1_null, col2_null):
214214
).fetchall()
215215
assert res == expected_result(col1_null, col2_null, b'131072')
216216

217-
@test_nulls()
217+
@null_test_parameters()
218218
@pytest.mark.parametrize(
219219
["constructor", "unit", "expected"],
220220
[
@@ -252,7 +252,7 @@ def test_struct_of_time(self, duckdb_cursor, constructor, unit, expected, col1_n
252252
).fetchall()
253253
assert res == expected_result(col1_null, col2_null, expected)
254254

255-
@test_nulls()
255+
@null_test_parameters()
256256
# NOTE: there is sadly no way to create a 'interval[months]' (tiM) type from pyarrow
257257
@pytest.mark.parametrize(
258258
["constructor", "expected", "converter"],
@@ -287,7 +287,7 @@ def test_struct_of_interval(self, duckdb_cursor, constructor, expected, converte
287287
).fetchall()
288288
assert res == expected_result(col1_null, col2_null, expected)
289289

290-
@test_nulls()
290+
@null_test_parameters()
291291
@pytest.mark.parametrize(
292292
["constructor", "unit", "expected"],
293293
[
@@ -322,7 +322,7 @@ def test_struct_of_duration(self, duckdb_cursor, constructor, unit, expected, co
322322
).fetchall()
323323
assert res == expected_result(col1_null, col2_null, expected)
324324

325-
@test_nulls()
325+
@null_test_parameters()
326326
@pytest.mark.parametrize(
327327
["constructor", "unit", "expected"],
328328
[
@@ -360,7 +360,7 @@ def test_struct_of_timestamp_tz(self, duckdb_cursor, constructor, unit, expected
360360
).fetchall()
361361
assert res == expected_result(col1_null, col2_null, expected)
362362

363-
@test_nulls()
363+
@null_test_parameters()
364364
def test_struct_of_large_blobs(self, duckdb_cursor, col1_null, col2_null):
365365
col1 = [str(i) for i in range(0, MAGIC_ARRAY_SIZE)]
366366
if col1_null:
@@ -384,7 +384,7 @@ def test_struct_of_large_blobs(self, duckdb_cursor, col1_null, col2_null):
384384
).fetchall()
385385
assert res == expected_result(col1_null, col2_null, b'131072')
386386

387-
@test_nulls()
387+
@null_test_parameters()
388388
@pytest.mark.parametrize(
389389
["precision_scale", "expected"],
390390
[
@@ -425,7 +425,7 @@ def test_struct_of_decimal(self, duckdb_cursor, precision_scale, expected, col1_
425425
).fetchall()
426426
assert res == expected_result(col1_null, col2_null, expected)
427427

428-
@test_nulls()
428+
@null_test_parameters()
429429
def test_struct_of_small_list(self, duckdb_cursor, col1_null, col2_null):
430430
col1 = [str(i) for i in range(0, MAGIC_ARRAY_SIZE)]
431431
if col1_null:
@@ -455,7 +455,7 @@ def test_struct_of_small_list(self, duckdb_cursor, col1_null, col2_null):
455455
res2 = ['131072', '131072', '131072']
456456
assert res == [(res1, res2)]
457457

458-
@test_nulls()
458+
@null_test_parameters()
459459
def test_struct_of_fixed_size_list(self, duckdb_cursor, col1_null, col2_null):
460460
col1 = [str(i) for i in range(0, MAGIC_ARRAY_SIZE)]
461461
if col1_null:
@@ -485,7 +485,7 @@ def test_struct_of_fixed_size_list(self, duckdb_cursor, col1_null, col2_null):
485485
res2 = ('131072', '131072', '131072')
486486
assert res == [(res1, res2)]
487487

488-
@test_nulls()
488+
@null_test_parameters()
489489
def test_struct_of_fixed_size_blob(self, duckdb_cursor, col1_null, col2_null):
490490
col1 = [str(i) for i in range(0, MAGIC_ARRAY_SIZE)]
491491
if col1_null:
@@ -516,7 +516,7 @@ def test_struct_of_fixed_size_blob(self, duckdb_cursor, col1_null, col2_null):
516516
res2 = (b'131072', b'131073', b'131074')
517517
assert res == [(res1, res2)]
518518

519-
@test_nulls()
519+
@null_test_parameters()
520520
def test_struct_of_list_of_blobs(self, duckdb_cursor, col1_null, col2_null):
521521
col1 = [str(i) for i in range(0, MAGIC_ARRAY_SIZE)]
522522
if col1_null:
@@ -547,7 +547,7 @@ def test_struct_of_list_of_blobs(self, duckdb_cursor, col1_null, col2_null):
547547
res2 = [b'131072', b'131073', b'131074']
548548
assert res == [(res1, res2)]
549549

550-
@test_nulls()
550+
@null_test_parameters()
551551
def test_struct_of_list_of_list(self, duckdb_cursor, col1_null, col2_null):
552552
col1 = [i for i in range(0, MAGIC_ARRAY_SIZE)]
553553
if col1_null:
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import duckdb
2+
import pytest
3+
from decimal import Decimal
4+
5+
pa = pytest.importorskip("pyarrow")
6+
7+
8+
class TestArrowDecimalTypes(object):
9+
def test_decimal_v1_5(self, duckdb_cursor):
10+
duckdb_cursor = duckdb.connect()
11+
duckdb_cursor.execute(f"SET arrow_output_version = 1.5")
12+
decimal_32 = pa.Table.from_pylist(
13+
[
14+
{"data": Decimal("100.20")},
15+
{"data": Decimal("110.21")},
16+
{"data": Decimal("31.20")},
17+
{"data": Decimal("500.20")},
18+
],
19+
pa.schema([("data", pa.decimal32(5, 2))]),
20+
)
21+
col_type = duckdb_cursor.execute("FROM decimal_32").arrow().schema.field("data").type
22+
assert col_type.bit_width == 32 and pa.types.is_decimal(col_type)
23+
24+
decimal_64 = pa.Table.from_pylist(
25+
[
26+
{"data": Decimal("1000.231")},
27+
{"data": Decimal("1100.231")},
28+
{"data": Decimal("999999999999.231")},
29+
{"data": Decimal("500.20")},
30+
],
31+
pa.schema([("data", pa.decimal64(16, 3))]),
32+
)
33+
col_type = duckdb_cursor.execute("FROM decimal_64").arrow().schema.field("data").type
34+
assert col_type.bit_width == 64 and pa.types.is_decimal(col_type)
35+
for version in ['1.0', '1.1', '1.2', '1.3', '1.4']:
36+
duckdb_cursor.execute(f"SET arrow_output_version = {version}")
37+
result = duckdb_cursor.execute("FROM decimal_32").arrow()
38+
col_type = result.schema.field("data").type
39+
assert col_type.bit_width == 128 and pa.types.is_decimal(col_type)
40+
assert result.to_pydict() == {
41+
'data': [Decimal('100.20'), Decimal('110.21'), Decimal('31.20'), Decimal('500.20')]
42+
}
43+
44+
result = duckdb_cursor.execute("FROM decimal_64").arrow()
45+
col_type = result.schema.field("data").type
46+
assert col_type.bit_width == 128 and pa.types.is_decimal(col_type)
47+
assert result.to_pydict() == {
48+
'data': [Decimal('1000.231'), Decimal('1100.231'), Decimal('999999999999.231'), Decimal('500.200')]
49+
}
50+
51+
def test_invalide_opt(self, duckdb_cursor):
52+
duckdb_cursor = duckdb.connect()
53+
with pytest.raises(
54+
duckdb.NotImplementedException, match=" Unrecognized parameter for option arrow_output_version"
55+
):
56+
duckdb_cursor.execute(f"SET arrow_output_version = 999.9")
57+
58+
def test_view_v1_4(self, duckdb_cursor):
59+
duckdb_cursor = duckdb.connect()
60+
duckdb_cursor.execute(f"SET arrow_output_version = 1.5")
61+
duckdb_cursor.execute("SET produce_arrow_string_view=True")
62+
duckdb_cursor.execute("SET arrow_output_list_view=True")
63+
col_type = duckdb_cursor.execute("SELECT 'string' as data ").arrow().schema.field("data").type
64+
assert pa.types.is_string_view(col_type)
65+
col_type = duckdb_cursor.execute("SELECT ['string'] as data ").arrow().schema.field("data").type
66+
assert pa.types.is_list_view(col_type)
67+
68+
for version in ['1.0', '1.1', '1.2', '1.3']:
69+
duckdb_cursor.execute(f"SET arrow_output_version = {version}")
70+
col_type = duckdb_cursor.execute("SELECT 'string' as data ").arrow().schema.field("data").type
71+
assert not pa.types.is_string_view(col_type)
72+
col_type = duckdb_cursor.execute("SELECT ['string'] as data ").arrow().schema.field("data").type
73+
assert not pa.types.is_list_view(col_type)
74+
75+
for version in ['1.4', '1.5']:
76+
duckdb_cursor.execute(f"SET arrow_output_version = {version}")
77+
col_type = duckdb_cursor.execute("SELECT 'string' as data ").arrow().schema.field("data").type
78+
assert pa.types.is_string_view(col_type)
79+
80+
col_type = duckdb_cursor.execute("SELECT ['string'] as data ").arrow().schema.field("data").type
81+
assert pa.types.is_list_view(col_type)
82+
83+
duckdb_cursor.execute("SET produce_arrow_string_view=False")
84+
duckdb_cursor.execute("SET arrow_output_list_view=False")
85+
for version in ['1.4', '1.5']:
86+
duckdb_cursor.execute(f"SET arrow_output_version = {version}")
87+
col_type = duckdb_cursor.execute("SELECT 'string' as data ").arrow().schema.field("data").type
88+
assert not pa.types.is_string_view(col_type)
89+
col_type = duckdb_cursor.execute("SELECT ['string'] as data ").arrow().schema.field("data").type
90+
assert not pa.types.is_list_view(col_type)

tests/fast/arrow/test_filter_pushdown.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,7 @@ def test_filter_column_removal(self, duckdb_cursor, create_table):
737737
match = re.search("│ +b +│", query_res[0][1])
738738
assert not match
739739

740-
@pytest.mark.skipif(sys.version_info < (3, 9), reason="Requires python 3.9")
740+
@pytest.mark.skipif(sys.version_info <= (3, 9), reason="Requires python 3.9")
741741
@pytest.mark.parametrize('create_table', [create_pyarrow_pandas, create_pyarrow_table])
742742
def test_struct_filter_pushdown(self, duckdb_cursor, create_table):
743743
duckdb_cursor.execute(
@@ -808,7 +808,7 @@ def test_struct_filter_pushdown(self, duckdb_cursor, create_table):
808808
match = re.search(".*ARROW_SCAN.*Filters: s\\.a IS NULL.*", query_res[0][1], flags=re.DOTALL)
809809
assert not match
810810

811-
@pytest.mark.skipif(sys.version_info < (3, 9), reason="Requires python 3.9")
811+
@pytest.mark.skipif(sys.version_info <= (3, 9), reason="Requires python 3.9")
812812
@pytest.mark.parametrize('create_table', [create_pyarrow_pandas, create_pyarrow_table])
813813
def test_nested_struct_filter_pushdown(self, duckdb_cursor, create_table):
814814
duckdb_cursor.execute(
@@ -1013,3 +1013,9 @@ def assert_equal_results(con, arrow_table, query):
10131013
assert_equal_results(duckdb_cursor, arrow_table, "select * from {table} where a <= 'NaN'::FLOAT")
10141014
assert_equal_results(duckdb_cursor, arrow_table, "select * from {table} where a = 'NaN'::FLOAT")
10151015
assert_equal_results(duckdb_cursor, arrow_table, "select * from {table} where a != 'NaN'::FLOAT")
1016+
1017+
def test_dynamic_filter(self, duckdb_cursor):
1018+
t = pa.Table.from_pydict({"a": [3, 24, 234, 234, 234, 234, 234, 234, 234, 45, 2, 5, 2, 45]})
1019+
duckdb_cursor.register("t", t)
1020+
res = duckdb_cursor.sql("SELECT a FROM t ORDER BY a LIMIT 11").fetchall()
1021+
assert len(res) == 11

0 commit comments

Comments
 (0)