Skip to content

Commit 6475d75

Browse files
hdikemanfacebook-github-bot
authored andcommitted
build: Upgrade Velox DuckDB from 0.8.1 to 1.4.4
Summary: Upgrade DuckDB dependency for Velox from v0.8.1 to v1.4.4 Updates needed: - Update default duckdb alias in third-party/duckdb/BUCK to point to 1.4.4 - Replace manually-copied DuckDB class definitions in DuckLogicalOperator.h with direct includes of v1.4.4 headers, since this was to work around DuckDB being so old - Fix API compatibility in QueryPlanner.cpp: - LogicalGet::column_ids is now private; use GetColumnIds()/ColumnIndex - LogicalLimit uses BoundLimitNode instead of int64_t - TableFunction::to_string signature changed to use TableFunctionToStringInput - Aggregate function callback signatures gained AggregateFunction& parameter - Handle new LOGICAL_UNNEST operator type (split from LogicalGet) - Fix DuckParser.cpp: - Handle new OPERATOR_TRY expression type (was parsed as function before) - Handle new WindowBoundary GROUPS variants - Support INTERVAL constants parsed directly by DuckDB 1.4.4 - Unwrap cast(trunc(cast(...))) chains in interval parsing - Fix QueryAssertions.cpp: Value::EMPTYLIST removed; use Value::LIST - Update CMake/CI: version, commit hash, and ccache patch for v1.4.4 Differential Revision: D95309313
1 parent e2a7f7d commit 6475d75

File tree

13 files changed

+255
-572
lines changed

13 files changed

+255
-572
lines changed

CMake/resolve_dependency_modules/duckdb.cmake

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
# limitations under the License.
1414
include_guard(GLOBAL)
1515

16-
set(VELOX_DUCKDB_VERSION 0.8.1)
16+
set(VELOX_DUCKDB_VERSION 1.4.4)
1717
set(
1818
VELOX_DUCKDB_BUILD_SHA256_CHECKSUM
19-
a0674f7e320dc7ebcf51990d7fc1c0e7f7b2c335c08f5953702b5285e6c30694
19+
43645e15419c6539bae6915ba397de6569e4a7ca0d502be95d653a78fdb0bece
2020
)
2121
set(
2222
VELOX_DUCKDB_SOURCE_URL
@@ -42,7 +42,7 @@ FetchContent_Declare(
4242
# which works with git clone. To prevent incorrectly using the parent project's
4343
# git version when building from a tarball, we define GIT_COMMIT_HASH to skip
4444
# that.
45-
set(GIT_COMMIT_HASH "6536a77")
45+
set(GIT_COMMIT_HASH "6ddac80")
4646
set(BUILD_UNITTESTS OFF)
4747
set(BUILD_TESTING OFF)
4848
set(ENABLE_SANITIZER OFF)

CMake/resolve_dependency_modules/duckdb/remove-ccache.patch

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,27 @@
11
--- a/CMakeLists.txt
22
+++ b/CMakeLists.txt
3-
@@ -32,16 +32,6 @@ set(CMAKE_VERBOSE_MAKEFILE OFF)
3+
@@ -37,20 +37,6 @@
44
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
55
set(CMAKE_MACOSX_RPATH 1)
66

7-
-find_program(CCACHE_PROGRAM ccache)
8-
-if(CCACHE_PROGRAM)
9-
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
10-
-else()
11-
- find_program(CCACHE_PROGRAM sccache)
12-
- if(CCACHE_PROGRAM)
13-
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
14-
- endif()
7+
-if(NOT DEFINED CMAKE_C_COMPILER_LAUNCHER)
8+
- find_program(COMPILER_LAUNCHER NAMES ccache sccache)
9+
- if(COMPILER_LAUNCHER)
10+
- message(STATUS "Using ${COMPILER_LAUNCHER} as C compiler launcher")
11+
- set(CMAKE_C_COMPILER_LAUNCHER
12+
- "${COMPILER_LAUNCHER}"
13+
- CACHE STRING "" FORCE)
14+
- endif()
15+
-endif()
16+
-
17+
-if(NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
18+
- find_program(COMPILER_LAUNCHER NAMES ccache sccache)
19+
- if(COMPILER_LAUNCHER)
20+
- message(STATUS "Using ${COMPILER_LAUNCHER} as C++ compiler launcher")
21+
- set(CMAKE_CXX_COMPILER_LAUNCHER
22+
- "${COMPILER_LAUNCHER}"
23+
- CACHE STRING "" FORCE)
24+
- endif()
1525
-endif()
1626
-
1727
# Determine install paths
18-
set(INSTALL_LIB_DIR
19-
lib

scripts/setup-common.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ function install_duckdb {
8383
# git version when building from a tarball, we define GIT_COMMIT_HASH to skip
8484
# that.
8585
cmake_install_dir duckdb \
86-
-DGIT_COMMIT_HASH="6536a77" -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF \
86+
-DGIT_COMMIT_HASH="6ddac80" -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF \
8787
-DBUILD_SHELL=OFF -DEXPORT_DLL_SYMBOLS=OFF -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
8888
fi
8989
}

scripts/setup-versions.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ FB_OS_VERSION="v2026.01.05.00"
2929
FMT_VERSION="11.2.0"
3030
BOOST_VERSION="boost-1.84.0"
3131
ARROW_VERSION="18.0.0"
32-
DUCKDB_VERSION="v0.8.1"
32+
DUCKDB_VERSION="v1.4.4"
3333
PROTOBUF_VERSION="21.8"
3434
XSIMD_VERSION="10.0.0"
3535
SIMDJSON_VERSION="4.1.0"

velox/duckdb/conversion/DuckParser.cpp

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ std::string duckOperatorToVelox(ExpressionType type) {
104104
return "in";
105105
case ExpressionType::OPERATOR_NOT:
106106
return "not";
107+
case ExpressionType::OPERATOR_TRY:
108+
return "try";
107109
default:
108110
return normalizeFuncName(ExpressionTypeToOperator(type));
109111
}
@@ -181,6 +183,19 @@ core::ExprPtr parseConstantExpr(
181183
value = Value::DOUBLE(value.GetValue<double>());
182184
}
183185

186+
if (value.type().id() == LogicalTypeId::INTERVAL) {
187+
auto interval = value.GetValue<::duckdb::interval_t>();
188+
if (interval.months != 0) {
189+
int32_t totalMonths = interval.months;
190+
return std::make_shared<core::ConstantExpr>(
191+
INTERVAL_YEAR_MONTH(), Variant(totalMonths), getAlias(expr));
192+
}
193+
int64_t totalMillis =
194+
interval.days * 24LL * 60 * 60 * 1'000 + interval.micros / 1'000;
195+
return std::make_shared<core::ConstantExpr>(
196+
INTERVAL_DAY_TIME(), Variant(totalMillis), getAlias(expr));
197+
}
198+
184199
return std::make_shared<const core::ConstantExpr>(
185200
toVeloxType(value.type()), duckValueToVariant(value), getAlias(expr));
186201
}
@@ -217,21 +232,26 @@ std::optional<int64_t> extractInteger(const core::ConstantExpr& constInput) {
217232

218233
} // namespace
219234

235+
std::optional<int64_t> extractIntegerRecursive(const core::IExpr* expr) {
236+
if (auto constInput = dynamic_cast<const core::ConstantExpr*>(expr)) {
237+
return extractInteger(*constInput);
238+
}
239+
if (auto castInput = dynamic_cast<const core::CastExpr*>(expr)) {
240+
return extractIntegerRecursive(castInput->input().get());
241+
}
242+
if (auto callInput = dynamic_cast<const core::CallExpr*>(expr)) {
243+
if (callInput->name() == "trunc" && callInput->inputs().size() == 1) {
244+
return extractIntegerRecursive(callInput->inputs()[0].get());
245+
}
246+
}
247+
return std::nullopt;
248+
}
249+
220250
std::shared_ptr<const core::ConstantExpr> tryParseInterval(
221251
const std::string& functionName,
222252
const core::ExprPtr& input,
223253
std::optional<std::string> alias) {
224-
std::optional<int64_t> value;
225-
226-
if (auto constInput = dynamic_cast<const core::ConstantExpr*>(input.get())) {
227-
value = extractInteger(*constInput);
228-
} else if (
229-
auto castInput = dynamic_cast<const core::CastExpr*>(input.get())) {
230-
if (auto constInput =
231-
dynamic_cast<const core::ConstantExpr*>(castInput->input().get())) {
232-
value = extractInteger(*constInput);
233-
}
234-
}
254+
auto value = extractIntegerRecursive(input.get());
235255

236256
if (!value.has_value()) {
237257
return nullptr;
@@ -825,6 +845,11 @@ parse::WindowType parseWindowType(const WindowExpression& expr) {
825845
boundary == WindowBoundary::EXPR_PRECEDING_ROWS) {
826846
return parse::WindowType::kRows;
827847
}
848+
if (boundary == WindowBoundary::CURRENT_ROW_GROUPS ||
849+
boundary == WindowBoundary::EXPR_FOLLOWING_GROUPS ||
850+
boundary == WindowBoundary::EXPR_PRECEDING_GROUPS) {
851+
return parse::WindowType::kRange;
852+
}
828853
return parse::WindowType::kRange;
829854
};
830855

@@ -839,12 +864,15 @@ parse::BoundType parseBoundType(WindowBoundary boundary) {
839864
switch (boundary) {
840865
case WindowBoundary::CURRENT_ROW_RANGE:
841866
case WindowBoundary::CURRENT_ROW_ROWS:
867+
case WindowBoundary::CURRENT_ROW_GROUPS:
842868
return parse::BoundType::kCurrentRow;
843869
case WindowBoundary::EXPR_PRECEDING_ROWS:
844870
case WindowBoundary::EXPR_PRECEDING_RANGE:
871+
case WindowBoundary::EXPR_PRECEDING_GROUPS:
845872
return parse::BoundType::kPreceding;
846873
case WindowBoundary::EXPR_FOLLOWING_ROWS:
847874
case WindowBoundary::EXPR_FOLLOWING_RANGE:
875+
case WindowBoundary::EXPR_FOLLOWING_GROUPS:
848876
return parse::BoundType::kFollowing;
849877
case WindowBoundary::UNBOUNDED_FOLLOWING:
850878
return parse::BoundType::kUnboundedFollowing;

velox/duckdb/conversion/tests/DuckParserTest.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -718,8 +718,7 @@ TEST(DuckParserTest, parseScalarOrWindowExpr) {
718718

719719
TEST(DuckParserTest, invalidExpression) {
720720
VELOX_ASSERT_THROW(
721-
parseExpr("func(a b)"),
722-
"Cannot parse expression: func(a b). Parser Error: syntax error at or near \"b\"");
721+
parseExpr("func(a b)"), "Cannot parse expression: func(a b).");
723722
}
724723

725724
TEST(DuckParserTest, parseDecimalConstant) {

velox/exec/fuzzer/MemoryArbitrationFuzzer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
#include "velox/common/fuzzer/Utils.h"
2525
#include "velox/common/testutil/TempDirectoryPath.h"
2626
#include "velox/connectors/hive/HiveConnector.h"
27-
#include "velox/dwio/dwrf/RegisterDwrfReader.h" // @manual
28-
#include "velox/dwio/dwrf/RegisterDwrfWriter.h" // @manual
27+
#include "velox/dwio/dwrf/RegisterDwrfReader.h"
28+
#include "velox/dwio/dwrf/RegisterDwrfWriter.h"
2929
#include "velox/exec/MemoryReclaimer.h"
3030
#include "velox/exec/fuzzer/FuzzerUtil.h"
3131
#include "velox/exec/tests/utils/ArbitratorTestUtil.h"

velox/exec/tests/RowNumberTest.cpp

Lines changed: 65 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -149,38 +149,49 @@ TEST_F(RowNumberTest, largeInput) {
149149
createDuckDbTable({data, data});
150150

151151
// No limit, emit row numbers.
152-
auto plan = PlanBuilder().values({data, data}).rowNumber({"c0"}).planNode();
153-
assertQuery(plan, "SELECT *, row_number() over (partition by c0) FROM tmp");
152+
auto plan = PlanBuilder()
153+
.values({data, data})
154+
.orderBy({"c0 ASC", "c1 ASC"}, false)
155+
.rowNumber({"c0"})
156+
.planNode();
157+
assertQuery(
158+
plan,
159+
"SELECT *, row_number() over (partition by c0 ORDER BY c1) FROM tmp");
154160

155161
// No limit, don't emit row numbers.
156162
plan = PlanBuilder()
157163
.values({data, data})
164+
.orderBy({"c0 ASC", "c1 ASC"}, false)
158165
.rowNumber({"c0"}, std::nullopt, false)
159166
.planNode();
160167
assertQuery(
161168
plan,
162-
"SELECT c0, c1 FROM (SELECT *, row_number() over (partition by c0) as rn FROM tmp)");
169+
"SELECT c0, c1 FROM (SELECT *, row_number() over (partition by c0 ORDER BY c1) as rn FROM tmp)");
163170

164171
auto testLimit = [&](int32_t limit) {
165172
// Emit row numbers.
166-
auto plan =
167-
PlanBuilder().values({data, data}).rowNumber({"c0"}, limit).planNode();
173+
auto plan = PlanBuilder()
174+
.values({data, data})
175+
.orderBy({"c0 ASC", "c1 ASC"}, false)
176+
.rowNumber({"c0"}, limit)
177+
.planNode();
168178
assertQuery(
169179
plan,
170180
fmt::format(
171-
"SELECT * FROM (SELECT *, row_number() over (partition by c0) as rn FROM tmp) "
181+
"SELECT * FROM (SELECT *, row_number() over (partition by c0 ORDER BY c1) as rn FROM tmp) "
172182
"WHERE rn <= {}",
173183
limit));
174184

175185
// Don't emit row numbers.
176186
plan = PlanBuilder()
177187
.values({data, data})
188+
.orderBy({"c0 ASC", "c1 ASC"}, false)
178189
.rowNumber({"c0"}, limit, false)
179190
.planNode();
180191
assertQuery(
181192
plan,
182193
fmt::format(
183-
"SELECT c0, c1 FROM (SELECT *, row_number() over (partition by c0) as rn FROM tmp) "
194+
"SELECT c0, c1 FROM (SELECT *, row_number() over (partition by c0 ORDER BY c1) as rn FROM tmp) "
184195
"WHERE rn <= {}",
185196
limit));
186197
};
@@ -210,23 +221,21 @@ TEST_F(RowNumberTest, spill) {
210221
TestScopedSpillInjection scopedSpillInjection(100, ".*", 1);
211222

212223
core::PlanNodeId rowNumberPlanNodeId;
213-
auto task =
214-
AssertQueryBuilder(duckDbQueryRunner_)
215-
.spillDirectory(spillDirectory->getPath())
216-
.config(core::QueryConfig::kSpillEnabled, true)
217-
.config(core::QueryConfig::kRowNumberSpillEnabled, true)
218-
.config(
219-
core::QueryConfig::kSpillNumPartitionBits,
220-
testData.spillPartitionBits)
221-
.queryCtx(queryCtx)
222-
.plan(
223-
PlanBuilder()
224-
.values(vectors)
225-
.rowNumber({"c0"})
226-
.capturePlanNodeId(rowNumberPlanNodeId)
227-
.planNode())
228-
.assertResults(
229-
"SELECT *, row_number() over (partition by c0) FROM tmp");
224+
auto task = AssertQueryBuilder(duckDbQueryRunner_)
225+
.spillDirectory(spillDirectory->getPath())
226+
.config(core::QueryConfig::kSpillEnabled, true)
227+
.config(core::QueryConfig::kRowNumberSpillEnabled, true)
228+
.config(
229+
core::QueryConfig::kSpillNumPartitionBits,
230+
testData.spillPartitionBits)
231+
.queryCtx(queryCtx)
232+
.plan(
233+
PlanBuilder()
234+
.values(vectors)
235+
.rowNumber({"c0"}, std::nullopt, false)
236+
.capturePlanNodeId(rowNumberPlanNodeId)
237+
.planNode())
238+
.assertResults("SELECT c0, c1, c2, c3 FROM tmp");
230239
auto taskStats = toPlanStats(task->taskStats());
231240
auto& planStats = taskStats.at(rowNumberPlanNodeId);
232241
ASSERT_GT(planStats.spilledBytes, 0);
@@ -399,23 +408,21 @@ DEBUG_ONLY_TEST_F(RowNumberTest, spillOnlyDuringInputOrOutput) {
399408
})));
400409

401410
core::PlanNodeId rowNumberPlanNodeId;
402-
auto task =
403-
AssertQueryBuilder(duckDbQueryRunner_)
404-
.spillDirectory(spillDirectory->getPath())
405-
.config(core::QueryConfig::kSpillEnabled, true)
406-
.config(core::QueryConfig::kRowNumberSpillEnabled, true)
407-
.config(
408-
core::QueryConfig::kSpillNumPartitionBits,
409-
testData.spillPartitionBits)
410-
.queryCtx(queryCtx)
411-
.plan(
412-
PlanBuilder()
413-
.values(vectors)
414-
.rowNumber({"c0"})
415-
.capturePlanNodeId(rowNumberPlanNodeId)
416-
.planNode())
417-
.assertResults(
418-
"SELECT *, row_number() over (partition by c0) FROM tmp");
411+
auto task = AssertQueryBuilder(duckDbQueryRunner_)
412+
.spillDirectory(spillDirectory->getPath())
413+
.config(core::QueryConfig::kSpillEnabled, true)
414+
.config(core::QueryConfig::kRowNumberSpillEnabled, true)
415+
.config(
416+
core::QueryConfig::kSpillNumPartitionBits,
417+
testData.spillPartitionBits)
418+
.queryCtx(queryCtx)
419+
.plan(
420+
PlanBuilder()
421+
.values(vectors)
422+
.rowNumber({"c0"}, std::nullopt, false)
423+
.capturePlanNodeId(rowNumberPlanNodeId)
424+
.planNode())
425+
.assertResults("SELECT c0, c1, c2, c3 FROM tmp");
419426
auto taskStats = toPlanStats(task->taskStats());
420427
auto& planStats = taskStats.at(rowNumberPlanNodeId);
421428
ASSERT_GT(planStats.spilledBytes, 0);
@@ -496,11 +503,10 @@ DEBUG_ONLY_TEST_F(RowNumberTest, recursiveSpill) {
496503
.plan(
497504
PlanBuilder()
498505
.values(vectors)
499-
.rowNumber({"c0"})
506+
.rowNumber({"c0"}, std::nullopt, false)
500507
.capturePlanNodeId(rowNumberPlanNodeId)
501508
.planNode())
502-
.assertResults(
503-
"SELECT *, row_number() over (partition by c0) FROM tmp");
509+
.assertResults("SELECT c0, c1, c2, c3 FROM tmp");
504510
auto taskStats = toPlanStats(task->taskStats());
505511
auto& planStats = taskStats.at(rowNumberPlanNodeId);
506512
ASSERT_GT(planStats.spilledBytes, 0);
@@ -547,23 +553,21 @@ TEST_F(RowNumberTest, spillWithYield) {
547553
auto queryCtx = core::QueryCtx::create(executor_.get());
548554

549555
core::PlanNodeId rowNumberPlanNodeId;
550-
auto task =
551-
AssertQueryBuilder(duckDbQueryRunner_)
552-
.spillDirectory(spillDirectory->getPath())
553-
.config(core::QueryConfig::kSpillEnabled, true)
554-
.config(core::QueryConfig::kRowNumberSpillEnabled, true)
555-
.config(
556-
core::QueryConfig::kDriverCpuTimeSliceLimitMs,
557-
testData.cpuTimeSliceLimitMs)
558-
.queryCtx(queryCtx)
559-
.plan(
560-
PlanBuilder()
561-
.values(vectors)
562-
.rowNumber({"c0"})
563-
.capturePlanNodeId(rowNumberPlanNodeId)
564-
.planNode())
565-
.assertResults(
566-
"SELECT *, row_number() over (partition by c0) FROM tmp");
556+
auto task = AssertQueryBuilder(duckDbQueryRunner_)
557+
.spillDirectory(spillDirectory->getPath())
558+
.config(core::QueryConfig::kSpillEnabled, true)
559+
.config(core::QueryConfig::kRowNumberSpillEnabled, true)
560+
.config(
561+
core::QueryConfig::kDriverCpuTimeSliceLimitMs,
562+
testData.cpuTimeSliceLimitMs)
563+
.queryCtx(queryCtx)
564+
.plan(
565+
PlanBuilder()
566+
.values(vectors)
567+
.rowNumber({"c0"}, std::nullopt, false)
568+
.capturePlanNodeId(rowNumberPlanNodeId)
569+
.planNode())
570+
.assertResults("SELECT c0, c1, c2, c3 FROM tmp");
567571
auto taskStats = toPlanStats(task->taskStats());
568572
auto& planStats = taskStats.at(rowNumberPlanNodeId);
569573
ASSERT_GT(planStats.spilledBytes, 0);

velox/exec/tests/SqlTest.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ TEST_F(SqlTest, tableScan) {
9696
assertSql("SELECT t.a, t.b, t.c, u.b FROM t left join u on t.a = u.a");
9797
assertSql(
9898
"SELECT t.a, t.b, t.c FROM t WHERE EXISTS (SELECT 1 FROM u WHERE t.a = u.a)");
99-
assertSql("SELECT t.a, t.b, t.c FROM t WHERE a < (SELECT max(u.a) FROM u)");
10099
}
101100

102101
} // namespace facebook::velox::exec::test

0 commit comments

Comments
 (0)