Skip to content

Commit de633ce

Browse files
committed
fix: add support for projection
1 parent d86a334 commit de633ce

File tree

2 files changed

+182
-4
lines changed

2 files changed

+182
-4
lines changed

src/adbc_scan.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,13 @@ static unique_ptr<LocalTableFunctionState> AdbcScanInitLocal(ExecutionContext &c
330330
GlobalTableFunctionState *global_state_p) {
331331
auto current_chunk = make_uniq<ArrowArrayWrapper>();
332332
auto local_state = make_uniq<AdbcScanLocalState>(std::move(current_chunk), context.client);
333-
// Don't populate column_ids - we return all columns and let DuckDB project
333+
334+
// Populate column_ids for projection pushdown
335+
// ArrowToDuckDB uses these to map output column indices to Arrow array child indices
336+
for (auto &col_id : input.column_ids) {
337+
local_state->column_ids.push_back(col_id);
338+
}
339+
334340
return std::move(local_state);
335341
}
336342

@@ -393,11 +399,13 @@ static void AdbcScanFunction(ClientContext &context, TableFunctionInput &data, D
393399
output.SetCardinality(output_size);
394400

395401
// Convert Arrow data to DuckDB using ArrowTableFunction::ArrowToDuckDB
402+
// arrow_scan_is_projected = false because the ADBC driver returns all columns,
403+
// but ArrowToDuckDB will use local_state.column_ids to extract only the needed columns
396404
if (output_size > 0) {
397405
ArrowTableFunction::ArrowToDuckDB(local_state,
398406
bind_data.arrow_table.GetColumns(),
399407
output,
400-
false); // arrow_scan_is_projected = false (no projection pushdown)
408+
false);
401409
}
402410

403411
local_state.chunk_offset += output.size();
@@ -473,8 +481,9 @@ void RegisterAdbcTableFunctions(DatabaseInstance &db) {
473481
// Add named parameter for batch size hint (driver-specific, best-effort)
474482
adbc_scan_function.named_parameters["batch_size"] = LogicalType::BIGINT;
475483

476-
// Disable projection pushdown - we always return all columns from the ADBC query
477-
adbc_scan_function.projection_pushdown = false;
484+
// Enable projection pushdown - DuckDB will request only the columns it needs
485+
// The ADBC driver still returns all columns, but ArrowToDuckDB will extract only the needed ones
486+
adbc_scan_function.projection_pushdown = true;
478487

479488
// Add progress, cardinality, and to_string callbacks
480489
adbc_scan_function.table_scan_progress = AdbcScanProgress;

test/sql/adbc.test

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,3 +431,172 @@ statement error
431431
SELECT * FROM adbc_scan(getvariable('conn_id')::BIGINT, 'SELECT 1');
432432
----
433433
Invalid Input Error: adbc_scan: Invalid connection handle
434+
435+
# ============================================
436+
# Projection Pushdown Tests
437+
# ============================================
438+
439+
# Create a connection for projection tests
440+
statement ok
441+
SET VARIABLE proj_conn = (SELECT adbc_connect({'driver': 'sqlite', 'uri': ':memory:'}));
442+
443+
# Create a table with 30 columns of various types
444+
statement ok
445+
SELECT adbc_execute(getvariable('proj_conn')::BIGINT, '
446+
CREATE TABLE wide_table (
447+
col00 INTEGER, col01 TEXT, col02 REAL, col03 INTEGER, col04 TEXT,
448+
col05 REAL, col06 INTEGER, col07 TEXT, col08 REAL, col09 INTEGER,
449+
col10 TEXT, col11 REAL, col12 INTEGER, col13 TEXT, col14 REAL,
450+
col15 INTEGER, col16 TEXT, col17 REAL, col18 INTEGER, col19 TEXT,
451+
col20 REAL, col21 INTEGER, col22 TEXT, col23 REAL, col24 INTEGER,
452+
col25 TEXT, col26 REAL, col27 INTEGER, col28 TEXT, col29 REAL
453+
)
454+
');
455+
456+
# Insert test data (3 rows)
457+
statement ok
458+
SELECT adbc_execute(getvariable('proj_conn')::BIGINT, '
459+
INSERT INTO wide_table VALUES
460+
(0, ''a0'', 0.0, 3, ''a3'', 5.5, 6, ''a6'', 8.8, 9, ''a10'', 11.11, 12, ''a13'', 14.14, 15, ''a16'', 17.17, 18, ''a19'', 20.20, 21, ''a22'', 23.23, 24, ''a25'', 26.26, 27, ''a28'', 29.29),
461+
(100, ''b0'', 100.0, 103, ''b3'', 105.5, 106, ''b6'', 108.8, 109, ''b10'', 111.11, 112, ''b13'', 114.14, 115, ''b16'', 117.17, 118, ''b19'', 120.20, 121, ''b22'', 123.23, 124, ''b25'', 126.26, 127, ''b28'', 129.29),
462+
(200, ''c0'', 200.0, 203, ''c3'', 205.5, 206, ''c6'', 208.8, 209, ''c10'', 211.11, 212, ''c13'', 214.14, 215, ''c16'', 217.17, 218, ''c19'', 220.20, 221, ''c22'', 223.23, 224, ''c25'', 226.26, 227, ''c28'', 229.29)
463+
');
464+
465+
# Test 1: Select 5 randomly spaced columns (col02, col07, col15, col22, col28)
466+
query RTITT
467+
SELECT col02, col07, col15, col22, col28 FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table') ORDER BY col02;
468+
----
469+
0.0 a6 15 a22 a28
470+
100.0 b6 115 b22 b28
471+
200.0 c6 215 c22 c28
472+
473+
# Test 2: Select columns in non-sequential order (col28, col03, col19, col10, col00)
474+
query TITII
475+
SELECT col28, col03, col19, col10, col00 FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table') ORDER BY col00;
476+
----
477+
a28 3 a19 a10 0
478+
b28 103 b19 b10 100
479+
c28 203 c19 c10 200
480+
481+
# Test 3: Select only first column
482+
query I
483+
SELECT col00 FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table') ORDER BY col00;
484+
----
485+
0
486+
100
487+
200
488+
489+
# Test 4: Select only last column
490+
query R
491+
SELECT col29 FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table') ORDER BY col29;
492+
----
493+
29.29
494+
129.29
495+
229.29
496+
497+
# Test 5: Select first and last columns
498+
query IR
499+
SELECT col00, col29 FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table') ORDER BY col00;
500+
----
501+
0 29.29
502+
100 129.29
503+
200 229.29
504+
505+
# Test 6: Select all columns (no projection - verify full result)
506+
query I
507+
SELECT COUNT(*) FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table');
508+
----
509+
3
510+
511+
# Test 7: Aggregation with projection (only needs one column)
512+
query IR
513+
SELECT COUNT(*), SUM(col00) FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table');
514+
----
515+
3 300
516+
517+
# Test 8: Projection with WHERE clause (filter uses projected column)
518+
query IT
519+
SELECT col00, col01 FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table') WHERE col00 > 50 ORDER BY col00;
520+
----
521+
100 b0
522+
200 c0
523+
524+
# Test 9: Projection with expressions
525+
query IIR
526+
SELECT col00, col00 * 2 AS doubled, col02 + col05 AS sum_reals FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table') ORDER BY col00;
527+
----
528+
0 0 5.5
529+
100 200 205.5
530+
200 400 405.5
531+
532+
# Test 10: Select same column multiple times (edge case)
533+
query III
534+
SELECT col00, col00, col00 FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM wide_table') ORDER BY col00;
535+
----
536+
0 0 0
537+
100 100 100
538+
200 200 200
539+
540+
# Test 11: Large number of rows with projection (test batching)
541+
statement ok
542+
SELECT adbc_execute(getvariable('proj_conn')::BIGINT, 'CREATE TABLE large_wide (c0 INT, c1 INT, c2 INT, c3 INT, c4 INT, c5 INT, c6 INT, c7 INT, c8 INT, c9 INT)');
543+
544+
statement ok
545+
SELECT adbc_execute(getvariable('proj_conn')::BIGINT, '
546+
WITH RECURSIVE cnt(n) AS (
547+
SELECT 0
548+
UNION ALL
549+
SELECT n + 1 FROM cnt WHERE n < 9999
550+
)
551+
INSERT INTO large_wide SELECT n, n+1, n+2, n+3, n+4, n+5, n+6, n+7, n+8, n+9 FROM cnt
552+
');
553+
554+
# Select only 2 columns from 10, with 10000 rows
555+
query II
556+
SELECT SUM(c0), SUM(c9) FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM large_wide');
557+
----
558+
49995000 50085000
559+
560+
# Select columns at various positions
561+
query III
562+
SELECT SUM(c1), SUM(c5), SUM(c8) FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM large_wide');
563+
----
564+
50005000 50045000 50075000
565+
566+
# Test 12: Projection with NULL values
567+
statement ok
568+
SELECT adbc_execute(getvariable('proj_conn')::BIGINT, 'CREATE TABLE null_test (a INT, b TEXT, c REAL, d INT, e TEXT)');
569+
570+
statement ok
571+
SELECT adbc_execute(getvariable('proj_conn')::BIGINT, '
572+
INSERT INTO null_test VALUES
573+
(1, NULL, 1.1, NULL, ''e1''),
574+
(NULL, ''b2'', NULL, 4, NULL),
575+
(3, ''b3'', 3.3, 6, ''e3'')
576+
');
577+
578+
# Select columns with NULLs at various positions
579+
query ITI
580+
SELECT a, b, d FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM null_test') ORDER BY a NULLS LAST;
581+
----
582+
1 NULL NULL
583+
3 b3 6
584+
NULL b2 4
585+
586+
# Select only columns that have NULLs
587+
query TR
588+
SELECT b, c FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM null_test') ORDER BY c NULLS LAST;
589+
----
590+
NULL 1.1
591+
b3 3.3
592+
b2 NULL
593+
594+
# Test 13: Count with no columns (just counting rows)
595+
query I
596+
SELECT COUNT(*) FROM adbc_scan(getvariable('proj_conn')::BIGINT, 'SELECT * FROM null_test');
597+
----
598+
3
599+
600+
# Clean up projection connection
601+
statement ok
602+
SELECT adbc_disconnect(getvariable('proj_conn')::BIGINT);

0 commit comments

Comments
 (0)