Skip to content

Commit bafa9de

Browse files
hdikemanfacebook-github-bot
authored andcommitted
build: Upgrade Velox DuckDB from 0.8.1 to 1.4.4 (facebookincubator#16650)
Summary: Upgrade DuckDB dependency for Velox from v0.8.1 to v1.4.4 Updates needed: - Update default duckdb alias in third-party/duckdb/BUCK to point to 1.4.4 - Replace manually-copied DuckDB class definitions in DuckLogicalOperator.h with direct includes of v1.4.4 headers, since this was to work around DuckDB being so old - Fix API compatibility in QueryPlanner.cpp: - LogicalGet::column_ids is now private; use GetColumnIds()/ColumnIndex - LogicalLimit uses BoundLimitNode instead of int64_t - TableFunction::to_string signature changed to use TableFunctionToStringInput - Aggregate function callback signatures gained AggregateFunction& parameter - Handle new LOGICAL_UNNEST operator type (split from LogicalGet) - Fix DuckParser.cpp: - Handle new OPERATOR_TRY expression type (was parsed as function before) - Handle new WindowBoundary GROUPS variants - Support INTERVAL constants parsed directly by DuckDB 1.4.4 - Unwrap cast(trunc(cast(...))) chains in interval parsing - Fix QueryAssertions.cpp: Value::EMPTYLIST removed; use Value::LIST - Update CMake/CI: version, commit hash, and ccache patch for v1.4.4 Differential Revision: D95309313
1 parent e84a676 commit bafa9de

File tree

17 files changed

+309
-573
lines changed

17 files changed

+309
-573
lines changed

.github/workflows/linux-build-base.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ jobs:
9090
VELOX_DEPENDENCY_SOURCE: SYSTEM
9191
cudf_SOURCE: BUNDLED
9292
CUDA_VERSION: '12.9'
93+
DuckDB_SOURCE: BUNDLED
9394
faiss_SOURCE: BUNDLED
9495
USE_CLANG: "${{ inputs.use-clang && 'true' || 'false' }}"
9596
steps:
@@ -384,6 +385,7 @@ jobs:
384385
- name: Make Debug Build
385386
env:
386387
VELOX_DEPENDENCY_SOURCE: SYSTEM
388+
DuckDB_SOURCE: BUNDLED
387389
faiss_SOURCE: BUNDLED
388390
fmt_SOURCE: BUNDLED
389391
simdjson_SOURCE: BUNDLED

.github/workflows/scheduled.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ jobs:
216216
217217
- name: Build
218218
env:
219+
DuckDB_SOURCE: BUNDLED
219220
EXTRA_CMAKE_FLAGS: >
220221
-DVELOX_ENABLE_ARROW=ON
221222
-DVELOX_ENABLE_GEO=ON

CMake/resolve_dependency_modules/duckdb.cmake

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
# limitations under the License.
1414
include_guard(GLOBAL)
1515

16-
set(VELOX_DUCKDB_VERSION 0.8.1)
16+
set(VELOX_DUCKDB_VERSION 1.4.4)
1717
set(
1818
VELOX_DUCKDB_BUILD_SHA256_CHECKSUM
19-
a0674f7e320dc7ebcf51990d7fc1c0e7f7b2c335c08f5953702b5285e6c30694
19+
43645e15419c6539bae6915ba397de6569e4a7ca0d502be95d653a78fdb0bece
2020
)
2121
set(
2222
VELOX_DUCKDB_SOURCE_URL
@@ -35,14 +35,15 @@ FetchContent_Declare(
3535
URL_HASH ${VELOX_DUCKDB_BUILD_SHA256_CHECKSUM}
3636
PATCH_COMMAND
3737
git apply ${CMAKE_CURRENT_LIST_DIR}/duckdb/remove-ccache.patch && git apply
38-
${CMAKE_CURRENT_LIST_DIR}/duckdb/re2.patch
38+
${CMAKE_CURRENT_LIST_DIR}/duckdb/re2.patch && git apply
39+
${CMAKE_CURRENT_LIST_DIR}/duckdb/zstd.patch
3940
)
4041

4142
# DuckDB uses git commands to retrieve version information during the build,
4243
# which works with git clone. To prevent incorrectly using the parent project's
4344
# git version when building from a tarball, we define GIT_COMMIT_HASH to skip
4445
# that.
45-
set(GIT_COMMIT_HASH "6536a77")
46+
set(GIT_COMMIT_HASH "6ddac80")
4647
set(BUILD_UNITTESTS OFF)
4748
set(BUILD_TESTING OFF)
4849
set(ENABLE_SANITIZER OFF)

CMake/resolve_dependency_modules/duckdb/remove-ccache.patch

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,27 @@
11
--- a/CMakeLists.txt
22
+++ b/CMakeLists.txt
3-
@@ -32,16 +32,6 @@ set(CMAKE_VERBOSE_MAKEFILE OFF)
3+
@@ -37,24 +37,4 @@
44
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
55
set(CMAKE_MACOSX_RPATH 1)
66

7-
-find_program(CCACHE_PROGRAM ccache)
8-
-if(CCACHE_PROGRAM)
9-
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
10-
-else()
11-
- find_program(CCACHE_PROGRAM sccache)
12-
- if(CCACHE_PROGRAM)
13-
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
14-
- endif()
7+
-if(NOT DEFINED CMAKE_C_COMPILER_LAUNCHER)
8+
- find_program(COMPILER_LAUNCHER NAMES ccache sccache)
9+
- if(COMPILER_LAUNCHER)
10+
- message(STATUS "Using ${COMPILER_LAUNCHER} as C compiler launcher")
11+
- set(CMAKE_C_COMPILER_LAUNCHER
12+
- "${COMPILER_LAUNCHER}"
13+
- CACHE STRING "" FORCE)
14+
- endif()
15+
-endif()
16+
-
17+
-if(NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
18+
- find_program(COMPILER_LAUNCHER NAMES ccache sccache)
19+
- if(COMPILER_LAUNCHER)
20+
- message(STATUS "Using ${COMPILER_LAUNCHER} as C++ compiler launcher")
21+
- set(CMAKE_CXX_COMPILER_LAUNCHER
22+
- "${COMPILER_LAUNCHER}"
23+
- CACHE STRING "" FORCE)
24+
- endif()
1525
-endif()
1626
-
1727
# Determine install paths
18-
set(INSTALL_LIB_DIR
19-
lib
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
--- a/third_party/zstd/CMakeLists.txt
2+
+++ b/third_party/zstd/CMakeLists.txt
3+
@@ -40,7 +40,7 @@ add_library(duckdb_zstd STATIC ${ZSTD_FILES})
4+
5+
target_include_directories(
6+
duckdb_zstd
7+
- PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
8+
+ PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
9+
set_target_properties(duckdb_zstd PROPERTIES EXPORT_NAME duckdb_duckdb_zstd)
10+
11+
install(TARGETS duckdb_zstd

scripts/setup-common.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ function install_duckdb {
9090
# git version when building from a tarball, we define GIT_COMMIT_HASH to skip
9191
# that.
9292
cmake_install_dir duckdb \
93-
-DGIT_COMMIT_HASH="6536a77" -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF \
93+
-DGIT_COMMIT_HASH="6ddac80" -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF \
9494
-DBUILD_SHELL=OFF -DEXPORT_DLL_SYMBOLS=OFF -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
9595
fi
9696
}

scripts/setup-versions.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ FB_OS_VERSION="v2026.01.05.00"
2929
FMT_VERSION="11.2.0"
3030
BOOST_VERSION="boost-1.84.0"
3131
ARROW_VERSION="18.0.0"
32-
DUCKDB_VERSION="v0.8.1"
32+
DUCKDB_VERSION="v1.4.4"
3333
PROTOBUF_VERSION="21.8"
3434
XSIMD_VERSION="10.0.0"
3535
SIMDJSON_VERSION="4.1.0"

velox/duckdb/conversion/DuckParser.cpp

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ std::string duckOperatorToVelox(ExpressionType type) {
104104
return "in";
105105
case ExpressionType::OPERATOR_NOT:
106106
return "not";
107+
case ExpressionType::OPERATOR_TRY:
108+
return "try";
107109
default:
108110
return normalizeFuncName(ExpressionTypeToOperator(type));
109111
}
@@ -181,6 +183,19 @@ core::ExprPtr parseConstantExpr(
181183
value = Value::DOUBLE(value.GetValue<double>());
182184
}
183185

186+
if (value.type().id() == LogicalTypeId::INTERVAL) {
187+
auto interval = value.GetValue<::duckdb::interval_t>();
188+
if (interval.months != 0) {
189+
int32_t totalMonths = interval.months;
190+
return std::make_shared<core::ConstantExpr>(
191+
INTERVAL_YEAR_MONTH(), Variant(totalMonths), getAlias(expr));
192+
}
193+
int64_t totalMillis =
194+
interval.days * 24LL * 60 * 60 * 1'000 + interval.micros / 1'000;
195+
return std::make_shared<core::ConstantExpr>(
196+
INTERVAL_DAY_TIME(), Variant(totalMillis), getAlias(expr));
197+
}
198+
184199
return std::make_shared<const core::ConstantExpr>(
185200
toVeloxType(value.type()), duckValueToVariant(value), getAlias(expr));
186201
}
@@ -217,21 +232,26 @@ std::optional<int64_t> extractInteger(const core::ConstantExpr& constInput) {
217232

218233
} // namespace
219234

235+
std::optional<int64_t> extractIntegerRecursive(const core::IExpr* expr) {
236+
if (auto constInput = dynamic_cast<const core::ConstantExpr*>(expr)) {
237+
return extractInteger(*constInput);
238+
}
239+
if (auto castInput = dynamic_cast<const core::CastExpr*>(expr)) {
240+
return extractIntegerRecursive(castInput->input().get());
241+
}
242+
if (auto callInput = dynamic_cast<const core::CallExpr*>(expr)) {
243+
if (callInput->name() == "trunc" && callInput->inputs().size() == 1) {
244+
return extractIntegerRecursive(callInput->inputs()[0].get());
245+
}
246+
}
247+
return std::nullopt;
248+
}
249+
220250
std::shared_ptr<const core::ConstantExpr> tryParseInterval(
221251
const std::string& functionName,
222252
const core::ExprPtr& input,
223253
std::optional<std::string> alias) {
224-
std::optional<int64_t> value;
225-
226-
if (auto constInput = dynamic_cast<const core::ConstantExpr*>(input.get())) {
227-
value = extractInteger(*constInput);
228-
} else if (
229-
auto castInput = dynamic_cast<const core::CastExpr*>(input.get())) {
230-
if (auto constInput =
231-
dynamic_cast<const core::ConstantExpr*>(castInput->input().get())) {
232-
value = extractInteger(*constInput);
233-
}
234-
}
254+
auto value = extractIntegerRecursive(input.get());
235255

236256
if (!value.has_value()) {
237257
return nullptr;
@@ -825,6 +845,11 @@ parse::WindowType parseWindowType(const WindowExpression& expr) {
825845
boundary == WindowBoundary::EXPR_PRECEDING_ROWS) {
826846
return parse::WindowType::kRows;
827847
}
848+
if (boundary == WindowBoundary::CURRENT_ROW_GROUPS ||
849+
boundary == WindowBoundary::EXPR_FOLLOWING_GROUPS ||
850+
boundary == WindowBoundary::EXPR_PRECEDING_GROUPS) {
851+
return parse::WindowType::kRange;
852+
}
828853
return parse::WindowType::kRange;
829854
};
830855

@@ -839,12 +864,15 @@ parse::BoundType parseBoundType(WindowBoundary boundary) {
839864
switch (boundary) {
840865
case WindowBoundary::CURRENT_ROW_RANGE:
841866
case WindowBoundary::CURRENT_ROW_ROWS:
867+
case WindowBoundary::CURRENT_ROW_GROUPS:
842868
return parse::BoundType::kCurrentRow;
843869
case WindowBoundary::EXPR_PRECEDING_ROWS:
844870
case WindowBoundary::EXPR_PRECEDING_RANGE:
871+
case WindowBoundary::EXPR_PRECEDING_GROUPS:
845872
return parse::BoundType::kPreceding;
846873
case WindowBoundary::EXPR_FOLLOWING_ROWS:
847874
case WindowBoundary::EXPR_FOLLOWING_RANGE:
875+
case WindowBoundary::EXPR_FOLLOWING_GROUPS:
848876
return parse::BoundType::kFollowing;
849877
case WindowBoundary::UNBOUNDED_FOLLOWING:
850878
return parse::BoundType::kUnboundedFollowing;

velox/duckdb/conversion/tests/DuckParserTest.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -718,8 +718,7 @@ TEST(DuckParserTest, parseScalarOrWindowExpr) {
718718

719719
TEST(DuckParserTest, invalidExpression) {
720720
VELOX_ASSERT_THROW(
721-
parseExpr("func(a b)"),
722-
"Cannot parse expression: func(a b). Parser Error: syntax error at or near \"b\"");
721+
parseExpr("func(a b)"), "Cannot parse expression: func(a b).");
723722
}
724723

725724
TEST(DuckParserTest, parseDecimalConstant) {

velox/exec/fuzzer/MemoryArbitrationFuzzer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
#include "velox/common/fuzzer/Utils.h"
2525
#include "velox/common/testutil/TempDirectoryPath.h"
2626
#include "velox/connectors/hive/HiveConnector.h"
27-
#include "velox/dwio/dwrf/RegisterDwrfReader.h" // @manual
28-
#include "velox/dwio/dwrf/RegisterDwrfWriter.h" // @manual
27+
#include "velox/dwio/dwrf/RegisterDwrfReader.h"
28+
#include "velox/dwio/dwrf/RegisterDwrfWriter.h"
2929
#include "velox/exec/MemoryReclaimer.h"
3030
#include "velox/exec/fuzzer/FuzzerUtil.h"
3131
#include "velox/exec/tests/utils/ArbitratorTestUtil.h"

0 commit comments

Comments
 (0)