diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 3bea2fb..55c7202 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,10 +14,10 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@6a7a4f24c5999355ab36c0a6835baf891fc9d522 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.2 with: - ci_tools_version: 1f00107ca1eaf1691049907296a8aa796d054e6b - duckdb_version: ad1273222186d28b4b351736ed88101044bbe97b + ci_tools_version: v1.3.2 + duckdb_version: v1.3.2 extension_name: vortex exclude_archs: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;windows_amd64;linux_arm64" extra_toolchains: "rust" diff --git a/.gitmodules b/.gitmodules index c47fc9c..75dabc5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,14 +1,14 @@ +[submodule "duckdb"] + path = duckdb + url = https://github.com/duckdb/duckdb.git [submodule "extension-ci-tools"] path = extension-ci-tools url = https://github.com/duckdb/extension-ci-tools branch = main +[submodule "vcpkg"] + path = vcpkg + url = https://github.com/microsoft/vcpkg.git [submodule "vortex"] path = vortex url = https://github.com/spiraldb/vortex branch = main -[submodule "duckdb"] - path = duckdb - url = https://github.com/spiraldb/duckdb.git -[submodule "vcpkg"] - path = vcpkg - url = git@github.com:microsoft/vcpkg.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 776934f..f2a3d2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,9 +6,6 @@ project(${TARGET_NAME}_project) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_STANDARD 17) -# Allow C++20 designator syntax in C++17 -add_compile_options(-Wno-c++20-designator) - include(FetchContent) FetchContent_Declare( Corrosion @@ -18,44 +15,64 @@ FetchContent_Declare( FetchContent_MakeAvailable(Corrosion) -find_package(Catch2 CONFIG REQUIRED) -find_package(Protobuf CONFIG REQUIRED) - if (APPLE) find_library(SECURITY_FRAMEWORK Security) + find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation) endif () +# Install clang and libclang for Linux Docker builds. macOS has both of these pre-installed. +if(EXISTS "/.dockerenv") + find_program(YUM_EXEC yum) + if(YUM_EXEC AND NOT INSTALL_SUCCESS) + execute_process( + COMMAND yum install -y clang-devel clang + RESULT_VARIABLE YUM_RESULT + ) + if(YUM_RESULT EQUAL 0) + set(INSTALL_SUCCESS TRUE) + endif() + endif() + + find_program(APK_EXEC apk) + if(APK_EXEC AND NOT INSTALL_SUCCESS) + execute_process( + COMMAND apk add --no-cache clang-dev clang + RESULT_VARIABLE APK_RESULT + ) + if(APK_RESULT EQUAL 0) + set(INSTALL_SUCCESS TRUE) + endif() + endif() +endif() + corrosion_import_crate(MANIFEST_PATH vortex/Cargo.toml CRATES vortex-ffi - FEATURES duckdb mimalloc + FEATURES mimalloc CRATE_TYPES staticlib FLAGS --crate-type=staticlib ) -set(EXTENSION_NAME ${TARGET_NAME}_extension) -set(EXTENSION_SOURCES src/vortex_extension.cpp src/vortex_expr.cpp src/vortex_write.cpp src/vortex_scan.cpp) -set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) +corrosion_import_crate(MANIFEST_PATH vortex/Cargo.toml + CRATES vortex-duckdb + CRATE_TYPES staticlib + FLAGS --crate-type=staticlib +) -# Generate C++ code from .proto files. -file(GLOB PROTO_FILES "vortex/vortex-proto/proto/*.proto") -set(PROTO_GEN_DIR ${CMAKE_CURRENT_SOURCE_DIR}/gen) -file(MAKE_DIRECTORY ${PROTO_GEN_DIR}) -protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS ${PROTO_FILES} PROTOC_OUT_DIR ${PROTO_GEN_DIR}) +include_directories(src/include vortex/vortex-duckdb/include) -include_directories(src/include ${PROTO_GEN_DIR} vortex/vortex-ffi/cinclude) +set(EXTENSION_NAME ${TARGET_NAME}_extension) +set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) -build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES} ${PROTO_SRCS}) -set(PARAMETERS "-warnings") -build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES} ${PROTO_SRCS}) +build_static_extension(${TARGET_NAME} src/vortex_extension.cpp) +build_loadable_extension(${TARGET_NAME} -warnings src/vortex_extension.cpp) target_link_libraries(${EXTENSION_NAME} + vortex_duckdb-static vortex_ffi-static - protobuf::libprotobuf ${SECURITY_FRAMEWORK} + ${CORE_FOUNDATION_FRAMEWORK} ) -add_subdirectory(test) - install( TARGETS ${EXTENSION_NAME} EXPORT "${DUCKDB_EXPORT_SET}" diff --git a/Makefile b/Makefile index f78516a..3017d3f 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,6 @@ PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) EXT_NAME=vortex_duckdb EXT_CONFIG=${PROJ_DIR}extension_config.cmake EXT_FLAGS=-DCMAKE_OSX_DEPLOYMENT_TARGET=12.0 - -export OVERRIDE_GIT_DESCRIBE=v1.3.2 export MACOSX_DEPLOYMENT_TARGET=12.0 export VCPKG_FEATURE_FLAGS=-binarycaching export VCPKG_OSX_DEPLOYMENT_TARGET=12.0 diff --git a/duckdb b/duckdb index d323876..0b83e5d 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit d323876d6a6d41847ac71b9d0966bc65a4bdf55f +Subproject commit 0b83e5d2f68bc02dfefde74b846bd039f078affa diff --git a/src/include/vortex.hpp b/src/include/vortex.hpp deleted file mode 100644 index 3a7a3e9..0000000 --- a/src/include/vortex.hpp +++ /dev/null @@ -1,3 +0,0 @@ -// This header prefixes the auto-generated vortex.h with #pragma once. -#pragma once -#include "vortex.h" diff --git a/src/include/vortex_common.hpp b/src/include/vortex_common.hpp deleted file mode 100644 index 67a55e8..0000000 --- a/src/include/vortex_common.hpp +++ /dev/null @@ -1,185 +0,0 @@ -#pragma once -#define ENABLE_DUCKDB_FFI - -#include "duckdb.hpp" -#include "duckdb/common/unique_ptr.hpp" - -#include "vortex.hpp" -#include "vortex_error.hpp" -#include "vortex_session.hpp" - -namespace vortex { - -struct DType { - explicit DType(const vx_dtype *dtype) : dtype(dtype) { - } - - static duckdb::unique_ptr FromDuckDBTable(const std::vector &column_types, - const std::vector &column_nullable, - const std::vector &column_names) { - D_ASSERT(column_names.size() == column_nullable.size()); - D_ASSERT(column_names.size() == column_types.size()); - - auto dtype = Try([&](auto err) { - return vx_duckdb_logical_type_to_dtype(column_types.data(), column_nullable.data(), column_names.data(), - column_names.size(), err); - }); - - return duckdb::make_uniq(dtype); - } - - ~DType() { - if (dtype != nullptr) { - vx_dtype_free(dtype); - } - } - - const vx_dtype *dtype; -}; - -struct VortexFile { - explicit VortexFile(const vx_file *file) : file(file) { - } - - ~VortexFile() { - vx_file_free(file); - } - - static duckdb::unique_ptr Open(const vx_file_open_options *options, VortexSession &session) { - auto file = Try([&](auto err) { return vx_file_open_reader(options, session.session, err); }); - return duckdb::make_uniq(file); - } - - vx_array_iterator *Scan(const vx_file_scan_options *options) { - return Try([&](auto err) { return vx_file_scan(this->file, options, err); }); - } - - bool CanPrune(const char *filter_expression, unsigned int filter_expression_len, unsigned long file_idx) { - return Try([&](auto err) { - return vx_file_can_prune(this->file, filter_expression, filter_expression_len, file_idx, err); - }); - } - - uint64_t RowCount() { - return vx_file_row_count(file); - } - - struct DType DType() { - return vortex::DType(vx_dtype_clone(vx_file_dtype(file))); - } - - const vx_file *file; -}; - - -struct Array { - explicit Array(const vx_array *array) : array(array) { - } - - ~Array() { - if (array != nullptr) { - vx_array_free(array); - } - } - - static duckdb::unique_ptr FromDuckDBChunk(DType &dtype, duckdb::DataChunk &chunk) { - auto array = Try([&](auto err) { - return vx_duckdb_chunk_to_array(reinterpret_cast(&chunk), dtype.dtype, err); - }); - - return duckdb::make_uniq(array); - } - - const vx_array *array; -}; - - -struct ArrayIterator { - explicit ArrayIterator(vx_array_iterator *array_iter) : array_iter(array_iter) { - } - - /// Releases ownership of the native array iterator ptr to the caller. The caller is then responsible for - /// eventually calling vx_array_iter_free. - /// - /// This ArrayIterator is useless after this call. - vx_array_iterator* release() { - auto* ptr = array_iter; - array_iter = nullptr; // Give up ownership - return ptr; - } - - ~ArrayIterator() { - if (array_iter) { - vx_array_iterator_free(array_iter); - } - } - - duckdb::unique_ptr NextArray() const { - auto array = Try([&](auto err) { return vx_array_iterator_next(array_iter, err); }); - - if (array == nullptr) { - return nullptr; - } - - return duckdb::make_uniq(array); - } - - vx_array_iterator *array_iter; -}; - - -struct ArrayExporter { - explicit ArrayExporter(vx_duckdb_exporter *exporter) : exporter(exporter) { - } - - ~ArrayExporter() { - if (exporter != nullptr) { - vx_duckdb_exporter_free(exporter); - } - } - - static duckdb::unique_ptr FromArrayIterator(duckdb::unique_ptr array_iter) { - auto exporter = vx_duckdb_exporter_new(array_iter->release()); - return duckdb::make_uniq(exporter); - } - - bool ExportNext(duckdb_data_chunk output) const { - return Try([&](auto err) { return vx_duckdb_exporter_next(exporter, output, err); }); - } - - vx_duckdb_exporter *exporter; -}; - - -struct ArrayStreamSink { - explicit ArrayStreamSink(vx_array_sink *sink, duckdb::unique_ptr dtype) - : sink(sink), dtype(std::move(dtype)) { - } - - static duckdb::unique_ptr Create(std::string file_path, duckdb::unique_ptr &&dtype) { - auto sink = Try([&](auto err) { return vx_array_sink_open_file(file_path.c_str(), dtype->dtype, err); }); - return duckdb::make_uniq(sink, std::move(dtype)); - } - - void PushChunk(duckdb::DataChunk &chunk) { - auto array = Array::FromDuckDBChunk(*dtype, chunk); - Try([&](auto err) { vx_array_sink_push(sink, array->array, err); }); - } - - void Close() { - Try([&](auto err) { vx_array_sink_close(sink, err); }); - this->sink = nullptr; - } - - ~ArrayStreamSink() { - // "should dctor a sink, before closing it - // If you throw during writes then the stack will be unwound and the destructor is going to be called before the - // close method is invoked thus triggering following assertion failure and will clobber the exception printing - // D_ASSERT(sink == nullptr); - } - - vx_array_sink *sink; - duckdb::unique_ptr dtype; -}; - -} // namespace vortex diff --git a/src/include/vortex_error.hpp b/src/include/vortex_error.hpp deleted file mode 100644 index 4d8e7ae..0000000 --- a/src/include/vortex_error.hpp +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#include -#include - -#include "duckdb.hpp" -#include "vortex.hpp" - -namespace vortex { - -inline void HandleError(vx_error *error) { - if (error != nullptr) { - auto msg_str = vx_error_get_message(error); - auto msg = std::string(vx_string_ptr(msg_str), vx_string_len(msg_str)); - vx_error_free(error); - throw duckdb::InvalidInputException(msg); - } -} - -template -auto Try(Func func) { - vx_error *error = nullptr; - // Handle both void and non-void return types. - if constexpr (std::is_void_v>) { - func(&error); - HandleError(error); - } else { - auto result = func(&error); - HandleError(error); - return result; - } -} - -} // namespace vortex diff --git a/src/include/vortex_expr.hpp b/src/include/vortex_expr.hpp deleted file mode 100644 index a46336c..0000000 --- a/src/include/vortex_expr.hpp +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include "duckdb/planner/expression.hpp" -#include "duckdb/planner/table_filter.hpp" - -#include "expr.pb.h" - -namespace vortex { -// vortex expr proto ids. -const std::string BETWEEN_ID = "between"; -const std::string BINARY_ID = "binary"; -const std::string GET_ITEM_ID = "get_item"; -const std::string PACK_ID = "pack"; -const std::string VAR_ID = "var"; -const std::string LIKE_ID = "like"; -const std::string LITERAL_ID = "literal"; -const std::string NOT_ID = "not"; -const std::string LIST_CONTAINS_ID = "list_contains"; - -vortex::expr::Expr *table_expression_into_expr(google::protobuf::Arena &arena, duckdb::TableFilter &filter, - const std::string &column_name); -vortex::expr::Expr *expression_into_vortex_expr(google::protobuf::Arena &arena, const duckdb::Expression &expr); -vortex::expr::Expr *flatten_exprs(google::protobuf::Arena &arena, - const duckdb::vector &child_filters); -vortex::expr::Expr *pack_projection_columns(google::protobuf::Arena &arena, duckdb::vector columns); -} // namespace vortex diff --git a/src/include/vortex_scan.hpp b/src/include/vortex_scan.hpp deleted file mode 100644 index 66383ba..0000000 --- a/src/include/vortex_scan.hpp +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include "duckdb/main/extension_util.hpp" - -namespace vortex { -void RegisterScanFunction(duckdb::DatabaseInstance &instance); -} diff --git a/src/include/vortex_session.hpp b/src/include/vortex_session.hpp deleted file mode 100644 index c8ead2e..0000000 --- a/src/include/vortex_session.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include "vortex.hpp" -#include "duckdb/storage/object_cache.hpp" - -namespace vortex { - -class VortexSession : public duckdb::ObjectCacheEntry { -public: - VortexSession() : session(vx_session_new()) { - } - - ~VortexSession() override { - vx_session_free(session); - } - - vx_session *session; - - static std::string ObjectType() { - return "vortex_session_cache_metadata"; - } - - std::string GetObjectType() override { - return ObjectType(); - } -}; - -} // namespace vortex \ No newline at end of file diff --git a/src/include/vortex_write.hpp b/src/include/vortex_write.hpp deleted file mode 100644 index c8a0503..0000000 --- a/src/include/vortex_write.hpp +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include "duckdb/main/extension_util.hpp" - -namespace vortex { -void RegisterWriteFunction(duckdb::DatabaseInstance &instance); -} diff --git a/src/vortex_expr.cpp b/src/vortex_expr.cpp deleted file mode 100644 index bd0874f..0000000 --- a/src/vortex_expr.cpp +++ /dev/null @@ -1,548 +0,0 @@ -#include - -#include "duckdb/planner/expression.hpp" -#include "duckdb/planner/table_filter.hpp" -#include "duckdb/planner/filter/constant_filter.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/parser/expression/columnref_expression.hpp" -#include "duckdb/parser/expression/comparison_expression.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/planner/expression/bound_between_expression.hpp" -#include "duckdb/planner/expression/bound_columnref_expression.hpp" -#include "duckdb/planner/expression/bound_comparison_expression.hpp" -#include "duckdb/planner/expression/bound_constant_expression.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" -#include "duckdb/planner/expression/bound_operator_expression.hpp" -#include "duckdb/planner/filter/conjunction_filter.hpp" -#include "duckdb/planner/filter/optional_filter.hpp" - -#include "vortex_expr.hpp" - -#include "duckdb/planner/filter/in_filter.hpp" - -using duckdb::ConjunctionAndFilter; -using duckdb::ConjunctionOrFilter; -using duckdb::ConstantFilter; -using duckdb::Exception; -using duckdb::ExceptionType; -using duckdb::ExpressionType; -using duckdb::LogicalType; -using duckdb::LogicalTypeId; -using duckdb::TableFilter; -using duckdb::TableFilterType; -using duckdb::Value; -using google::protobuf::Arena; -using std::string; - -namespace vortex { - -// Temporal ids -const string VORTEX_DATE_ID = "vortex.date"; -const string VORTEX_TIME_ID = "vortex.time"; -const string VORTEX_TIMESTAMP_ID = "vortex.timestamp"; - -const string VX_ROW_ID_COL_ID = "$vx.row_id"; - -const string DUCKDB_FUNCTION_NAME_CONTAINS = "contains"; - -enum TimeUnit : uint8_t { - /// Nanoseconds - Ns = 0, - /// Microseconds - Us = 1, - /// Milliseconds - Ms = 2, - /// Seconds - S = 3, - /// Days - D = 4, -}; - -expr::Kind_BinaryOp into_binary_operation(ExpressionType type) { - static const std::unordered_map op_map = { - {ExpressionType::COMPARE_EQUAL, expr::Kind_BinaryOp_Eq}, - {ExpressionType::COMPARE_NOTEQUAL, expr::Kind_BinaryOp_NotEq}, - {ExpressionType::COMPARE_LESSTHAN, expr::Kind_BinaryOp_Lt}, - {ExpressionType::COMPARE_GREATERTHAN, expr::Kind_BinaryOp_Gt}, - {ExpressionType::COMPARE_LESSTHANOREQUALTO, expr::Kind_BinaryOp_Lte}, - {ExpressionType::COMPARE_GREATERTHANOREQUALTO, expr::Kind_BinaryOp_Gte}, - {ExpressionType::CONJUNCTION_AND, expr::Kind_BinaryOp_And}, - {ExpressionType::CONJUNCTION_OR, expr::Kind_BinaryOp_Or}}; - - auto value = op_map.find(type); - if (value == op_map.end()) { - throw Exception(ExceptionType::NOT_IMPLEMENTED, "into_binary_operation", - {{"id", std::to_string(static_cast(type))}}); - } - return value->second; -} - -TimeUnit timestamp_to_time_unit(const LogicalType &type) { - switch (type.id()) { - case LogicalTypeId::TIMESTAMP_SEC: - return TimeUnit::S; - case LogicalTypeId::TIMESTAMP_MS: - return TimeUnit::Ms; - case LogicalTypeId::TIMESTAMP: - return TimeUnit::Us; - case LogicalTypeId::TIMESTAMP_NS: - return TimeUnit::Ns; - default: - throw Exception(ExceptionType::INVALID, "timestamp_to_time_unit given none timestamp type", - {{"id", type.ToString()}}); - } -} - -dtype::DType *into_vortex_dtype(Arena &arena, const LogicalType &type_, bool nullable) { - auto *dtype = Arena::Create(&arena); - switch (type_.id()) { - case LogicalTypeId::INVALID: - case LogicalTypeId::SQLNULL: - dtype->mutable_null(); - return dtype; - case LogicalTypeId::BOOLEAN: - dtype->mutable_bool_()->set_nullable(nullable); - return dtype; - case LogicalTypeId::TINYINT: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::I8); - return dtype; - case LogicalTypeId::SMALLINT: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::I16); - return dtype; - case LogicalTypeId::INTEGER: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::I32); - return dtype; - case LogicalTypeId::BIGINT: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::I64); - return dtype; - case LogicalTypeId::UTINYINT: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::U8); - return dtype; - case LogicalTypeId::USMALLINT: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::U16); - return dtype; - case LogicalTypeId::UINTEGER: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::U32); - return dtype; - case LogicalTypeId::UBIGINT: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::U64); - return dtype; - case LogicalTypeId::FLOAT: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::F32); - return dtype; - case LogicalTypeId::DOUBLE: - dtype->mutable_primitive()->set_nullable(nullable); - dtype->mutable_primitive()->set_type(dtype::F64); - return dtype; - case LogicalTypeId::DECIMAL: { - dtype->mutable_decimal()->set_nullable(nullable); - auto decimal = dtype->mutable_decimal(); - decimal->set_precision(duckdb::DecimalType::GetWidth(type_)); - decimal->set_scale(duckdb::DecimalType::GetScale(type_)); - return dtype; - } - case LogicalTypeId::CHAR: - case LogicalTypeId::VARCHAR: - dtype->mutable_utf8()->set_nullable(nullable); - return dtype; - case LogicalTypeId::BLOB: - dtype->mutable_binary()->set_nullable(nullable); - return dtype; - case LogicalTypeId::DATE: { - dtype->mutable_extension()->set_id(VORTEX_DATE_ID); - auto storage = dtype->mutable_extension()->mutable_storage_dtype(); - storage->mutable_primitive()->set_nullable(nullable); - storage->mutable_primitive()->set_type(dtype::I32); - dtype->mutable_extension()->set_metadata(std::string({static_cast(TimeUnit::D)})); - return dtype; - } - case LogicalTypeId::TIME: { - dtype->mutable_extension()->set_id(VORTEX_TIME_ID); - auto storage = dtype->mutable_extension()->mutable_storage_dtype(); - storage->mutable_primitive()->set_nullable(nullable); - storage->mutable_primitive()->set_type(dtype::I32); - dtype->mutable_extension()->set_metadata(std::string({static_cast(TimeUnit::Us)})); - return dtype; - } - case LogicalTypeId::TIMESTAMP_SEC: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_NS: { - dtype->mutable_extension()->set_id(VORTEX_TIMESTAMP_ID); - auto storage = dtype->mutable_extension()->mutable_storage_dtype(); - storage->mutable_primitive()->set_nullable(nullable); - storage->mutable_primitive()->set_type(dtype::I64); - auto time_unit = static_cast(timestamp_to_time_unit(type_)); - // This signifies a timestamp without a timezone - // TODO(joe): support timezones - dtype->mutable_extension()->set_metadata(std::string({time_unit, 0, 0})); - return dtype; - } - default: - throw Exception(ExceptionType::NOT_IMPLEMENTED, "into_vortex_dtype", {{"id", type_.ToString()}}); - } -} - -scalar::Scalar *into_null_scalar(Arena &arena, LogicalType &logical_type) { - auto scalar = Arena::Create(&arena); - scalar->set_allocated_dtype(into_vortex_dtype(arena, logical_type, true)); - scalar->mutable_value()->set_null_value(google::protobuf::NULL_VALUE); - return scalar; -} - -scalar::ScalarValue *into_vortex_scalar_value(Arena &arena, const Value &value) { - auto scalar = Arena::Create(&arena); - - switch (value.type().id()) { - case LogicalTypeId::INVALID: - case LogicalTypeId::SQLNULL: { - scalar->set_null_value(google::protobuf::NULL_VALUE); - return scalar; - } - case LogicalTypeId::BOOLEAN: { - scalar->set_bool_value(value.GetValue()); - return scalar; - } - case LogicalTypeId::TINYINT: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::SMALLINT: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::INTEGER: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::BIGINT: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::UTINYINT: - scalar->set_uint64_value(value.GetValue()); - return scalar; - case LogicalTypeId::USMALLINT: - scalar->set_uint64_value(value.GetValue()); - return scalar; - case LogicalTypeId::UINTEGER: - scalar->set_uint64_value(value.GetValue()); - return scalar; - case LogicalTypeId::UBIGINT: - scalar->set_uint64_value(value.GetValue()); - return scalar; - case LogicalTypeId::FLOAT: - scalar->set_f32_value(value.GetValue()); - return scalar; - case LogicalTypeId::DOUBLE: - scalar->set_f64_value(value.GetValue()); - return scalar; - case LogicalTypeId::DECIMAL: { - auto huge = value.GetValue(); - uint32_t out[4]; - out[0] = static_cast(huge); - out[1] = static_cast(huge >> 32); - out[2] = static_cast(huge >> 64); - out[3] = static_cast(huge >> 96); - scalar->set_bytes_value(std::string(reinterpret_cast(out), 8)); - return scalar; - } - case LogicalTypeId::VARCHAR: - scalar->set_string_value(value.GetValue()); - return scalar; - case LogicalTypeId::DATE: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::TIME: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::TIMESTAMP_SEC: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::TIMESTAMP_MS: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::TIMESTAMP: - scalar->set_int64_value(value.GetValue()); - return scalar; - case LogicalTypeId::TIMESTAMP_NS: - scalar->set_int64_value(value.GetValue()); - return scalar; - default: - throw Exception(ExceptionType::NOT_IMPLEMENTED, "into_vortex_scalar", {{"id", value.ToString()}}); - } -} - -scalar::Scalar *into_vortex_scalar(Arena &arena, const Value &value, bool nullable) { - auto scalar = Arena::Create(&arena); - auto dtype = into_vortex_dtype(arena, value.type(), nullable); - scalar->set_allocated_dtype(dtype); - - auto scalar_value = into_vortex_scalar_value(arena, value); - scalar->mutable_value()->Swap(scalar_value); - return scalar; -} - -void set_column(const string &s, expr::Expr *column) { - - column->set_id(GET_ITEM_ID); - auto kind = column->mutable_kind(); - auto get_item = kind->mutable_get_item(); - get_item->mutable_path()->assign(s); - - auto id = column->add_children(); - id->set_id(VAR_ID); - if (s == "file_row_number" || s == "file_index") { - id->mutable_kind()->mutable_var()->set_var(VX_ROW_ID_COL_ID); - } else { - id->mutable_kind()->mutable_var()->set_var(""); - } -} - -void set_literal(Arena &arena, const Value &value, bool nullable, expr::Expr *constant) { - auto literal = constant->mutable_kind()->mutable_literal(); - auto dvalue = into_vortex_scalar(arena, value, nullable); - literal->set_allocated_value(dvalue); - constant->set_id(LITERAL_ID); -} - -expr::Expr *flatten_table_filters(Arena &arena, duckdb::vector> &child_filters, - expr::Kind_BinaryOp operation, const string &column_name) { - - D_ASSERT(!child_filters.empty()); - - if (child_filters.size() == 1) { - return table_expression_into_expr(arena, *child_filters[0], column_name); - } - - // Start with the first expression - auto tail = static_cast(nullptr); - auto hd = Arena::Create(&arena); - - // Flatten the list of children into a linked list of operation values. - for (size_t i = 0; i < child_filters.size() - 1; i++) { - expr::Expr *new_and = !tail ? hd : tail->add_children(); - new_and->set_id(BINARY_ID); - new_and->mutable_kind()->set_binary_op(operation); - new_and->add_children()->Swap(table_expression_into_expr(arena, *child_filters[i], column_name)); - - tail = new_and; - } - tail->add_children()->Swap(table_expression_into_expr(arena, *child_filters.back(), column_name)); - return hd; -} - -expr::Expr *flatten_exprs(Arena &arena, const duckdb::vector &child_filters) { - - if (child_filters.empty()) { - auto expr = arena.Create(&arena); - set_literal(arena, Value(true), true, expr); - return expr; - } - - if (child_filters.size() == 1) { - return child_filters[0]; - } - - // Start with the first expression - auto tail = static_cast(nullptr); - auto hd = Arena::Create(&arena); - - // Flatten the list of children into a linked list of AND values. - for (size_t i = 0; i < child_filters.size() - 1; i++) { - expr::Expr *new_and = !tail ? hd : tail->add_children(); - new_and->set_id(BINARY_ID); - new_and->mutable_kind()->set_binary_op(expr::Kind::And); - new_and->add_children()->Swap(child_filters[i]); - - tail = new_and; - } - tail->add_children()->Swap(child_filters.back()); - return hd; -} - -std::optional expr_to_like_pattern(const duckdb::Expression &dexpr) { - switch (dexpr.expression_class) { - case duckdb::ExpressionClass::BOUND_CONSTANT: { - auto &dconstant = dexpr.Cast(); - auto contains_pattern = dconstant.value.GetValue(); - auto like_pattern = "%" + contains_pattern + "%"; - return std::optional(like_pattern); - }; - default: - return std::nullopt; - } -} - -expr::Expr *expression_into_vortex_expr(Arena &arena, const duckdb::Expression &dexpr) { - auto expr = Arena::Create(&arena); - switch (dexpr.expression_class) { - case duckdb::ExpressionClass::BOUND_COLUMN_REF: { - auto &dcol_ref = dexpr.Cast(); - auto column = expr; - set_column(dcol_ref.GetName(), column); - return expr; - } - case duckdb::ExpressionClass::BOUND_CONSTANT: { - auto &dconstant = dexpr.Cast(); - set_literal(arena, dconstant.value, true, expr); - return expr; - } - case duckdb::ExpressionClass::BOUND_COMPARISON: { - auto &dcompare = dexpr.Cast(); - auto left = expr->add_children(); - left->Swap(expression_into_vortex_expr(arena, *dcompare.left)); - auto right = expr->add_children(); - right->Swap(expression_into_vortex_expr(arena, *dcompare.right)); - auto bin_op = into_binary_operation(dcompare.type); - expr->mutable_kind()->set_binary_op(bin_op); - expr->set_id(BINARY_ID); - return expr; - } - case duckdb::ExpressionClass::BOUND_BETWEEN: { - auto &dbetween = dexpr.Cast(); - auto col = expression_into_vortex_expr(arena, *dbetween.input); - auto lower = expression_into_vortex_expr(arena, *dbetween.lower); - auto upper = expression_into_vortex_expr(arena, *dbetween.upper); - // Between order on vx is arr, lower, upper. - expr->add_children()->Swap(col); - expr->add_children()->Swap(lower); - expr->add_children()->Swap(upper); - auto kind = expr->mutable_kind()->mutable_between(); - kind->set_lower_strict(!dbetween.lower_inclusive); - kind->set_upper_strict(!dbetween.upper_inclusive); - expr->set_id(BETWEEN_ID); - return expr; - } - case duckdb::ExpressionClass::BOUND_OPERATOR: { - auto &dop = dexpr.Cast(); - if (dop.type != ExpressionType::OPERATOR_NOT) { - return nullptr; - } - auto child = expr->add_children(); - auto fn = expression_into_vortex_expr(arena, *dop.children[0]); - if (fn == nullptr) { - return nullptr; - } - child->Swap(fn); - expr->mutable_kind()->mutable_not_(); - expr->set_id(NOT_ID); - return expr; - } - case duckdb::ExpressionClass::BOUND_FUNCTION: { - auto &dfunc_expr = dexpr.Cast(); - auto &dfunc = dfunc_expr.function; - if (dfunc.name == DUCKDB_FUNCTION_NAME_CONTAINS) { - assert(dfunc_expr.children.size() == 2); - // value - expr->add_children()->Swap(expression_into_vortex_expr(arena, *dfunc_expr.children[0])); - // pattern - auto pattern = expr->add_children(); - - auto pattern_value = expr_to_like_pattern(*dfunc_expr.children[1]); - if (!pattern_value.has_value()) { - return nullptr; - } - set_literal(arena, Value(pattern_value.value()), true, pattern); - auto like = expr->mutable_kind()->mutable_like(); - like->set_case_insensitive(false); - like->set_negated(false); - expr->set_id(LIKE_ID); - return expr; - } - return nullptr; - } - default: - return nullptr; - } -} - -void set_list_element(Arena &arena, expr::Expr *list, duckdb::vector &values) { - list->set_id(LITERAL_ID); - auto ll = list->mutable_kind()->mutable_literal(); - auto scalar = ll->mutable_value(); - auto elem_type = into_vortex_dtype(arena, values[0].GetTypeMutable(), true); - auto list_type = scalar->mutable_dtype()->mutable_list(); - list_type->mutable_element_type()->Swap(elem_type); - list_type->set_nullable(true); - auto list_scalar_value = scalar->mutable_value()->mutable_list_value(); - for (auto &elem : values) { - list_scalar_value->mutable_values()->Add(std::move(*into_vortex_scalar_value(arena, elem))); - } -} - -expr::Expr *table_expression_into_expr(Arena &arena, TableFilter &filter, const string &column_name) { - auto expr = Arena::Create(&arena); - switch (filter.filter_type) { - case TableFilterType::CONSTANT_COMPARISON: { - auto &constant_filter = filter.Cast(); - auto bin_op = into_binary_operation(constant_filter.comparison_type); - - set_column(column_name, expr->add_children()); - set_literal(arena, constant_filter.constant, true, expr->add_children()); - - expr->mutable_kind()->set_binary_op(bin_op); - expr->set_id(BINARY_ID); - return expr; - } - case TableFilterType::CONJUNCTION_OR: { - auto &disjuncts = filter.Cast(); - return flatten_table_filters(arena, disjuncts.child_filters, expr::Kind_BinaryOp_Or, column_name); - } - case TableFilterType::CONJUNCTION_AND: { - auto &conjucts = filter.Cast(); - - return flatten_table_filters(arena, conjucts.child_filters, expr::Kind_BinaryOp_And, column_name); - } - case TableFilterType::IS_NULL: - case TableFilterType::IS_NOT_NULL: { - throw Exception(ExceptionType::NOT_IMPLEMENTED, "null checks"); - } - case TableFilterType::OPTIONAL_FILTER: { - auto *expr_o = - table_expression_into_expr(arena, *filter.Cast().child_filter, column_name); - if (expr_o != nullptr) { - return expr_o; - } - expr->set_id(LITERAL_ID); - auto lit = expr->mutable_kind()->mutable_literal(); - lit->mutable_value()->mutable_value()->set_bool_value(true); - lit->mutable_value()->mutable_dtype()->mutable_bool_()->set_nullable(false); - return expr; - } - case TableFilterType::IN_FILTER: { - auto &in_list_filter = filter.Cast(); - expr->set_id(LIST_CONTAINS_ID); - expr->mutable_kind()->mutable_list_contains(); - auto list = expr->add_children(); - set_list_element(arena, list, in_list_filter.values); - set_column(column_name, expr->add_children()); - return expr; - } - default: - break; - } - std::cout << "table expr: " << std::to_string(static_cast(filter.filter_type)) << filter.DebugToString() - << std::endl; - throw Exception(ExceptionType::NOT_IMPLEMENTED, "table_expression_into_expr", - {{"filter_type_id", std::to_string(static_cast(filter.filter_type))}}); -} - -vortex::expr::Expr *pack_projection_columns(google::protobuf::Arena &arena, duckdb::vector columns) { - auto expr = arena.Create(&arena); - expr->set_id(PACK_ID); - auto pack_paths = expr->mutable_kind()->mutable_pack()->mutable_paths(); - for (auto &columnn : columns) { - set_column(columnn, expr->add_children()); - pack_paths->Add(std::string(columnn)); - } - - return expr; -} - -} // namespace vortex diff --git a/src/vortex_extension.cpp b/src/vortex_extension.cpp index b6d120a..19edace 100644 --- a/src/vortex_extension.cpp +++ b/src/vortex_extension.cpp @@ -1,25 +1,21 @@ #define DUCKDB_EXTENSION_MAIN -#include "duckdb/main/extension_util.hpp" - #include "vortex_extension.hpp" -#include "vortex_write.hpp" -#include "vortex_scan.hpp" +#include "vortex.h" using namespace duckdb; -// The entry point class API can't be scoped to the vortex namespace. +static void LoadInternal(DatabaseInstance &db_instance) { + vortex_init(reinterpret_cast(&db_instance)); +} /// Called when the extension is loaded by DuckDB. /// It is responsible for registering functions and initializing state. /// /// Specifically, the `read_vortex` table function enables reading data from /// Vortex files in SQL queries. -void VortexExtension::Load(DuckDB &db) { - DatabaseInstance &instance = *db.instance; - - vortex::RegisterWriteFunction(instance); - vortex::RegisterScanFunction(instance); +void VortexExtension::Load(duckdb::DuckDB &db) { + LoadInternal(*db.instance); } /// Returns the name of the Vortex extension. @@ -36,18 +32,9 @@ std::string VortexExtension::Name() { //! Returns the version of the Vortex extension. std::string VortexExtension::Version() const { - return "0.1.0"; + return "0.41.2"; } -extern "C" { -__attribute__((__visibility__("default"))) -DUCKDB_EXTENSION_API void vortex_init(duckdb::DatabaseInstance &db) { - duckdb::DuckDB db_wrapper(db); - db_wrapper.LoadExtension(); -} - -__attribute__((__visibility__("default"))) -DUCKDB_EXTENSION_API const char *vortex_version() { - return duckdb::DuckDB::LibraryVersion(); -} -} +#ifndef DUCKDB_EXTENSION_MAIN +#error DUCKDB_EXTENSION_MAIN not defined +#endif diff --git a/src/vortex_scan.cpp b/src/vortex_scan.cpp deleted file mode 100644 index 8eae203..0000000 --- a/src/vortex_scan.cpp +++ /dev/null @@ -1,621 +0,0 @@ -#define ENABLE_DUCKDB_FFI - -#include -#include -#include -#include -#include -#include - -#include "duckdb/common/exception.hpp" -#include "duckdb/common/helper.hpp" -#include "duckdb/function/table_function.hpp" -#include "duckdb/main/extension_util.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/multi_file/multi_file_list.hpp" -#include "duckdb/storage/object_cache.hpp" - -#include "concurrentqueue.h" - -#include "vortex.hpp" -#include "vortex_scan.hpp" -#include "vortex_common.hpp" -#include "vortex_expr.hpp" -#include "vortex_session.hpp" -#include "duckdb/common/multi_file/multi_file_reader.hpp" -#include "duckdb/function/table/table_scan.hpp" -#include "duckdb/planner/filter/dynamic_filter.hpp" -#include "duckdb/planner/filter/optional_filter.hpp" - -using namespace duckdb; - -namespace vortex { - -static constexpr column_t _COLUMN_IDENTIFIER_FILE_ROW_NUMBER = UINT64_C(9223372036854775809); -static constexpr column_t _COLUMN_IDENTIFIER_FILE_INDEX = UINT64_C(9223372036854775810); - -/// Bind data for the Vortex table function that holds information about the -/// file and its schema. This data is populated during the bind phase, which -/// happens during the query planning phase. -struct ScanBindData : public TableFunctionData { - // Session used to caching - shared_ptr session; - - shared_ptr file_list; - vector columns_types; - vector column_names; - map virtual_col; - - // Used to read the schema during the bind phase and cached here to - // avoid having to open the same file again during the scan phase. - shared_ptr initial_file; - - // Used to create an arena for protobuf exprs, need a ptr since the bind arg is const. - unique_ptr arena; - vector conjuncts; - - bool Equals(const FunctionData &other_p) const override { - auto &other = other_p.Cast(); - return file_list == other.file_list && column_names == other.column_names && - columns_types == other.columns_types; - } - - unique_ptr Copy() const override { - auto result = make_uniq(); - result->arena = make_uniq(); - result->session = session; - result->file_list = file_list; - result->columns_types = columns_types; - result->column_names = column_names; - result->initial_file = initial_file; - result->virtual_col = virtual_col; - return std::move(result); - } - - std::string &ColumnName(column_t col) { - if (col < column_names.size()) { - return column_names[col]; - } - - return virtual_col[col]; - } -}; - -struct ScanPartition { - uint64_t file_idx; - uint64_t start_row; - uint64_t end_row; -}; - -/// Local state for the Vortex table function that tracks the progress of a scan -/// operation. In DuckDB's execution model, a query reading from a file can be -/// parallelized by dividing it into ranges, each handled by a different scan. -struct ScanLocalState : public LocalTableFunctionState { - bool finished; - - unique_ptr array_exporter; - - std::queue scan_partitions; - - // Thread local file. - std::optional thread_local_file_idx; -}; - -struct ScanGlobalState : public GlobalTableFunctionState { - std::atomic_uint64_t cache_id; - - vector expanded_files; - - optional_ptr filter; - // The precomputed filter string used in the query - std::string static_filter_str; - // Any dynamic filter contained in the query - map> dynamic_filters; - // Static filter expression, owned by the arena - expr::Expr *static_filter_expr; - - // Limited to indicate progress in `table_scan_progress`. - std::atomic_uint32_t partitions_processed; - std::atomic_uint32_t partitons_total; - - // Number of files which have are fully partitioned. - std::atomic_uint32_t files_partitioned; - - // Next file to partition. - std::atomic_uint32_t next_file_idx; - - // Multi producer, multi consumer lockfree queue. - duckdb_moodycamel::ConcurrentQueue scan_partitions {8192}; - - std::vector> files; - - // The column idx that must be returned by the scan. - vector column_ids; - vector projection_ids; - // The precomputed column names used in the query. - std::string projection; - - // This is the max number threads that the extension might use. - idx_t MaxThreads() const override { - constexpr uint32_t MAX_THREAD_COUNT = 192; - return MAX_THREAD_COUNT; - } - - // Return the conjunction of the static and dynamic filters, if either exist. - // The dynamic filter can be updated and so we recompute the filter if there is an - // active dyn filter. - // TODO(joe): cache the dyn filter expr if the dynamic filters have not changed. - std::string filter_expression_string(google::protobuf::Arena &arena) { - if (dynamic_filters.empty()) { - return static_filter_str; - } - vector conjs; - for (auto &[col_name, filter] : dynamic_filters) { - auto g = lock_guard(filter->lock); - if (!filter->initialized) { - continue; - } - auto conj = table_expression_into_expr(arena, *filter->filter, col_name); - if (conj) { - conjs.push_back(conj); - } - } - if (conjs.empty()) { - return static_filter_expr->SerializeAsString(); - } - auto dynamic_expr = flatten_exprs(arena, conjs); - auto expr = arena.Create(&arena); - expr->set_id(BINARY_ID); - expr->mutable_kind()->set_binary_op(expr::Kind::And); - expr->add_children()->Swap(dynamic_expr); - expr->add_children()->CopyFrom(*static_filter_expr); - return expr->SerializeAsString(); - } -}; - -// Use to create vortex expressions from `TableFilterSet` filter. -void ExtractFilterExpression(google::protobuf::Arena &arena, ScanBindData &data, - optional_ptr filter_set, vector column_ids, - vector &conjuncts, map> &dyn_filters) { - if (filter_set == nullptr) { - return; - } - - for (const auto &[col_id, value] : filter_set->filters) { - auto column_name = data.ColumnName(column_ids[col_id]); - - // Extract the optional dynamic filter, this seems like the only way that - // duckdb will use dynamic filters. - if (value->filter_type == TableFilterType::OPTIONAL_FILTER) { - auto &opt_filter = value->Cast().child_filter; - if (opt_filter->filter_type == TableFilterType::DYNAMIC_FILTER) { - dyn_filters.emplace(column_name, opt_filter->Cast().filter_data); - continue; - } - } - auto conj = table_expression_into_expr(arena, *value, column_name); - conjuncts.push_back(conj); - } -} - -static void PopulateProjection(ScanBindData &bind_data, ScanGlobalState &global_state, TableFunctionInitInput &input) { - global_state.projection_ids.reserve(input.projection_ids.size()); - for (auto proj_id : input.projection_ids) { - global_state.projection_ids.push_back(input.column_ids[proj_id]); - } - - auto vec = duckdb::vector(); - for (auto column_id : global_state.projection_ids) { - vec.push_back(bind_data.ColumnName(column_id)); - } - auto expr = pack_projection_columns(*bind_data.arena, vec); - global_state.projection = expr->SerializeAsString(); -} - -/// Extracts schema information from a Vortex file's data type. -static void ExtractVortexSchema(DType &file_dtype, vector &column_types, vector &column_names) { - auto struct_dtype = vx_dtype_struct_dtype(file_dtype.dtype); - uint32_t field_count = vx_struct_fields_nfields(struct_dtype); - for (uint32_t idx = 0; idx < field_count; idx++) { - auto vx_field_name = vx_struct_fields_field_name(struct_dtype, idx); - std::string field_name(vx_string_ptr(vx_field_name), vx_string_len(vx_field_name)); - - const vx_dtype *field_dtype = vx_struct_fields_field_dtype(struct_dtype, idx); - auto duckdb_type = Try([&](auto err) { return vx_dtype_to_duckdb_logical_type(field_dtype, err); }); - - column_names.push_back(field_name); - column_types.push_back(LogicalType(*reinterpret_cast(duckdb_type))); - vx_dtype_free(field_dtype); - duckdb_destroy_logical_type(&duckdb_type); - } -} - -std::string EnsureFileProtocol(FileSystem &fs, const std::string &path) { - // If the path is a URL then don't change it, otherwise try to make the path an absolute path - static const std::regex schema_prefix = std::regex("^[^/]*:\\/\\/.*$"); - if (std::regex_match(path, schema_prefix)) { - return path; - } - - const std::string prefix = "file://"; - if (fs.IsPathAbsolute(path)) { - return prefix + path; - } - - const auto absolute_path = fs.JoinPath(fs.GetWorkingDirectory(), path); - return prefix + absolute_path; -} - -static unique_ptr OpenFile(const std::string &filename, VortexSession &session, - vector &column_types, vector &column_names) { - vx_file_open_options options { - .uri = filename.c_str(), .property_keys = nullptr, .property_vals = nullptr, .property_len = 0}; - - auto file = VortexFile::Open(&options, session); - if (!file) { - throw IOException("Failed to open Vortex file: " + filename); - } - - // This pointer is owned by the file. - auto file_dtype = file->DType(); - if (vx_dtype_get_variant(file_dtype.dtype) != DTYPE_STRUCT) { - vx_file_free(file->file); - throw FatalException("Vortex file does not contain a struct array as a top-level dtype"); - } - - ExtractVortexSchema(file_dtype, column_types, column_names); - - return file; -} - -// Verifies that a new Vortex file's schema matches the expected schema from the bind phase. -// -// This function ensures schema consistency across all the files in a multi-file query. -// It compares the column types and names extracted from a new file against the schema -// obtained from the first file (stored in bind_data). -static void VerifyNewFile(const ScanBindData &bind_data, vector &column_types, - vector &column_names) { - if (column_types.size() != bind_data.columns_types.size() || column_names != bind_data.column_names) { - throw FatalException("Vortex file does not contain the same number of columns as the first"); - } - - for (size_t idx = 0; idx < bind_data.columns_types.size(); ++idx) { - if (bind_data.column_names[idx] != column_names[idx]) { - throw FatalException("Vortex file contains a column with a different name to the first"); - } - if (bind_data.columns_types[idx] != column_types[idx]) { - throw FatalException("Vortex file contains a column with a different type to the first"); - } - } -} - -static unique_ptr OpenFileAndVerify(FileSystem &fs, VortexSession &session, const std::string &filename, - const ScanBindData &bind_data) { - auto new_column_names = vector(); - new_column_names.reserve(bind_data.column_names.size()); - - auto new_column_types = vector(); - new_column_types.reserve(bind_data.columns_types.size()); - - auto file = OpenFile(EnsureFileProtocol(fs, filename), session, new_column_types, new_column_names); - VerifyNewFile(bind_data, new_column_types, new_column_names); - return file; -} - -static bool PinFileToThread(ScanGlobalState &global_state) { - // This is an approximation to determine whether we should switch to - // distributing partitions of the same file across threads and does - // not need to be exact in terms of how many threads DuckDB actually uses. - const auto thread_count = std::thread::hardware_concurrency(); - const auto file_count = global_state.expanded_files.size(); - return (file_count - global_state.files_partitioned) > thread_count; -} - -static void CreateScanPartitions(ClientContext &context, const ScanBindData &bind, ScanGlobalState &global_state, - ScanLocalState &local_state, uint64_t file_idx, VortexFile &file) { - auto filter_str = global_state.filter_expression_string(*bind.arena); - if (global_state.files[file_idx]->CanPrune(filter_str.data(), static_cast(filter_str.length()), - file_idx)) { - global_state.files_partitioned += 1; - return; - } - - const auto row_count = vx_file_row_count(file.file); - - const auto thread_count = std::thread::hardware_concurrency(); - const auto file_count = global_state.expanded_files.size(); - - // This is a multiple of the 2048 DuckDB vector size: - // - // Factors to consider: - // 1. A smaller value means more work for the Vortex file reader. - // 2. A larger value reduces the parallelism available to the scanner - const uint64_t partition_size = 2048 * (thread_count > file_count ? 32 : 64); - - const auto partition_count = std::max(static_cast(1), row_count / partition_size); - global_state.partitons_total += partition_count; - const bool pin_file_to_thread = PinFileToThread(global_state); - - if (pin_file_to_thread) { - local_state.thread_local_file_idx = file_idx; - } - - for (size_t partition_idx = 0; partition_idx < partition_count; ++partition_idx) { - const auto scan_partition = ScanPartition { - .file_idx = file_idx, - .start_row = partition_idx * partition_size, - .end_row = (partition_idx + 1) == partition_count ? row_count : (partition_idx + 1) * partition_size, - }; - - if (pin_file_to_thread) { - local_state.scan_partitions.push(scan_partition); - } else { - global_state.scan_partitions.enqueue(scan_partition); - } - } - - global_state.files_partitioned += 1; - D_ASSERT(global_state.files_partitioned <= global_state.expanded_files.size()); -} - -static unique_ptr OpenArrayIter(const ScanBindData &bind, ScanGlobalState &global_state, - shared_ptr &file, ScanPartition row_range_partition) { - auto filter_str = global_state.filter_expression_string(*bind.arena); - - const auto options = - vx_file_scan_options {.projection_expression = global_state.projection.data(), - .projection_expr_len = static_cast(global_state.projection.length()), - .filter_expression = filter_str.data(), - .filter_expression_len = static_cast(filter_str.length()), - .split_by_row_count = 0, - .row_range_start = row_range_partition.start_row, - .row_range_end = row_range_partition.end_row, - .file_index = row_range_partition.file_idx}; - - return make_uniq(file->Scan(&options)); -} - -// Assigns the next array exporter. -// -// Returns true if a new exporter was assigned, false otherwise. -static bool GetNextExporter(ClientContext &context, const ScanBindData &bind_data, ScanGlobalState &global_state, - ScanLocalState &local_state) { - - // Try to deque a partition off the thread local queue. - auto try_dequeue = [&](ScanPartition &scan_partition) { - if (local_state.scan_partitions.empty()) { - return false; - } - - scan_partition = local_state.scan_partitions.front(); - local_state.scan_partitions.pop(); - return true; - }; - - if (local_state.array_exporter == nullptr) { - ScanPartition partition; - - if (bool success = (try_dequeue(partition) || global_state.scan_partitions.try_dequeue(partition)); !success) { - - // Check whether all partitions have been processed. - if (global_state.files_partitioned == global_state.expanded_files.size()) { - - // A new partition might have been created after the first pop. Therefore, - // one more pop is necessary to ensure no more partitions are left to process. - if (success = global_state.scan_partitions.try_dequeue(partition); !success) { - local_state.finished = true; - return false; - } - } - - if (!success) { - return false; - } - } - - // Layout readers are safe to share across threads for reading. Further, they - // are created before pushing partitions of the corresponing files into a queue. - auto file = global_state.files[partition.file_idx]; - auto array_iter = OpenArrayIter(bind_data, global_state, file, partition); - local_state.array_exporter = ArrayExporter::FromArrayIterator(std::move(array_iter)); - } - - return true; -} - -static void VortexScanFunction(ClientContext &context, TableFunctionInput &data, DataChunk &output) { - auto &bind_data = data.bind_data->Cast(); - auto &global_state = data.global_state->Cast(); - auto &local_state = data.local_state->Cast(); - - while (true) { - if (local_state.array_exporter != nullptr) { - if (local_state.array_exporter->ExportNext(reinterpret_cast(&output))) { - // Successfully exported a chunk - return; - } else { - // Otherwise, reset the exporter and try the next one. - global_state.partitions_processed += 1; - local_state.array_exporter = nullptr; - } - } - - if (!local_state.finished) { - // Try to get the next exporter, if we fail, make progress on partitions and then loop. - if (!GetNextExporter(context, bind_data, global_state, local_state)) { - // Free file readers when owned by the thread. - if (local_state.scan_partitions.empty() && local_state.thread_local_file_idx.has_value()) { - global_state.files[local_state.thread_local_file_idx.value()] = nullptr; - local_state.thread_local_file_idx.reset(); - } - - // Create new scan partitions in case the queue is empty. - if (auto file_idx = global_state.next_file_idx.fetch_add(1); - file_idx < global_state.expanded_files.size()) { - if (file_idx == 0) { - global_state.files[0] = bind_data.initial_file; - } else { - auto file_name = global_state.expanded_files[file_idx]; - global_state.files[file_idx] = OpenFileAndVerify(FileSystem::GetFileSystem(context), - *bind_data.session, file_name, bind_data); - } - - CreateScanPartitions(context, bind_data, global_state, local_state, file_idx, - *global_state.files[file_idx]); - } - } - continue; - } - - // Otherwise, we're truly done. - output.Reset(); - return; - } -} - -/// The bind function (for the Vortex table function) is called during query -/// planning. The bind phase happens once per query and allows DuckDB to know -/// the schema of the data before execution begins. This enables optimizations -/// like projection pushdown and predicate pushdown. -static unique_ptr VortexBind(ClientContext &context, TableFunctionBindInput &input, - vector &column_types, vector &column_names) { - auto result = make_uniq(); - result->arena = make_uniq(); - - const static string VortexExtensionKey = std::string("vortex_extension:vortex_session"); - auto session = ObjectCache::GetObjectCache(context).Get(VortexExtensionKey); - if (session == nullptr) { - ObjectCache::GetObjectCache(context).Put(VortexExtensionKey, make_shared_ptr()); - session = ObjectCache::GetObjectCache(context).Get(VortexExtensionKey); - } - - result->session = session; - - auto file_glob_strings = duckdb::vector {input.inputs[0].GetValue()}; - auto file_glob = duckdb::vector(file_glob_strings.begin(), file_glob_strings.end()); - result->file_list = make_shared_ptr(context, file_glob, FileGlobOptions::DISALLOW_EMPTY); - - // Open the first file to extract the schema. - auto filename = EnsureFileProtocol(FileSystem::GetFileSystem(context), result->file_list->GetFirstFile().path); - result->initial_file = OpenFile(filename, *result->session, column_types, column_names); - - result->column_names = column_names; - result->columns_types = column_types; - - return std::move(result); -} - -unique_ptr VortexCardinality(ClientContext &context, const FunctionData *bind_data) { - auto &data = bind_data->Cast(); - - auto row_count = data.initial_file->RowCount(); - if (data.file_list->GetTotalFileCount() == 1) { - return make_uniq(row_count, row_count); - } else { - return make_uniq(row_count * data.file_list->GetTotalFileCount()); - } -} - -// Removes all filter expressions (from `filters`) which can be pushed down. -void PushdownComplexFilter(ClientContext &context, LogicalGet &get, FunctionData *bind_data, - vector> &filters) { - if (filters.empty()) { - return; - } - - auto &bind = bind_data->Cast(); - bind.conjuncts.reserve(filters.size()); - - // Delete filters here so they are not given to used the create global state callback. - for (auto iter = filters.begin(); iter != filters.end();) { - auto expr = expression_into_vortex_expr(*bind.arena, *iter->get()); - if (expr != nullptr) { - bind.conjuncts.push_back(expr); - - iter = filters.erase(iter); - } else { - ++iter; - } - } -} - -void RegisterScanFunction(DatabaseInstance &instance) { - - TableFunction vortex_scan("read_vortex", {LogicalType::VARCHAR}, VortexScanFunction, VortexBind); - - vortex_scan.init_global = [](ClientContext &context, - TableFunctionInitInput &input) -> unique_ptr { - auto &bind = input.bind_data->CastNoConst(); - auto global_state = make_uniq(); - - // TODO(joe): do this expansion gradually in the scan to avoid a slower start. - auto file_infos = bind.file_list->GetAllFiles(); - global_state->expanded_files.reserve(file_infos.size()); - for (const auto &file_info : file_infos) { - global_state->expanded_files.push_back(file_info.path); - } - global_state->filter = input.filters; - global_state->column_ids = input.column_ids; - - PopulateProjection(bind, *global_state, input); - - // Most expressions are extracted from `PushdownComplexFilter`, the final filters come from `input.filters`. - ExtractFilterExpression(*bind.arena, bind, input.filters, input.column_ids, bind.conjuncts, - global_state->dynamic_filters); - - // Create the static filter expression - global_state->static_filter_expr = flatten_exprs(*bind.arena, bind.conjuncts); - if (global_state->static_filter_expr != nullptr) { - global_state->static_filter_str = global_state->static_filter_expr->SerializeAsString(); - } - - // Resizing the empty vector default constructs std::shared pointers at all indices with nullptr. - global_state->files.resize(global_state->expanded_files.size()); - - return std::move(global_state); - }; - - vortex_scan.init_local = [](ExecutionContext &context, TableFunctionInitInput &input, - GlobalTableFunctionState *global_state) -> unique_ptr { - return make_uniq(); - }; - - vortex_scan.table_scan_progress = [](ClientContext &context, const FunctionData *bind_data, - const GlobalTableFunctionState *global_state) -> double { - auto &gstate = global_state->Cast(); - return 100.0 * (static_cast(gstate.partitions_processed) / static_cast(gstate.partitons_total)); - }; - - vortex_scan.pushdown_complex_filter = PushdownComplexFilter; - vortex_scan.projection_pushdown = true; - vortex_scan.cardinality = VortexCardinality; - vortex_scan.filter_pushdown = true; - vortex_scan.filter_prune = true; - vortex_scan.late_materialization = true; - - vortex_scan.get_row_id_columns = [](ClientContext &context, - optional_ptr bind_data) -> vector { - vector result; - result.emplace_back(_COLUMN_IDENTIFIER_FILE_ROW_NUMBER); - result.emplace_back(_COLUMN_IDENTIFIER_FILE_INDEX); - return result; - }; - vortex_scan.get_virtual_columns = [](ClientContext &context, - optional_ptr bind_data) -> virtual_column_map_t { - auto &scan_bind_data = bind_data->Cast(); - virtual_column_map_t result; - - result.insert( - make_pair(_COLUMN_IDENTIFIER_FILE_ROW_NUMBER, TableColumn("file_row_number", LogicalType::UBIGINT))); - result.insert(make_pair(_COLUMN_IDENTIFIER_FILE_INDEX, TableColumn("file_index", LogicalType::UBIGINT))); - - scan_bind_data.virtual_col[_COLUMN_IDENTIFIER_FILE_ROW_NUMBER] = "file_row_number"; - scan_bind_data.virtual_col[_COLUMN_IDENTIFIER_FILE_INDEX] = "file_index"; - - return result; - }; - - ExtensionUtil::RegisterFunction(instance, vortex_scan); -} - -} // namespace vortex diff --git a/src/vortex_write.cpp b/src/vortex_write.cpp deleted file mode 100644 index 8eefd0d..0000000 --- a/src/vortex_write.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/main/extension_util.hpp" -#include "duckdb/function/copy_function.hpp" -#include "duckdb/parser/constraints/not_null_constraint.hpp" - -#include "vortex_write.hpp" -#include "vortex_common.hpp" - -// TODO(joe): enable multi-threaded writes, see `WriteSink`. - -using namespace duckdb; - -namespace vortex { - -struct WriteBindData : public TableFunctionData { - //! True is the column is nullable - vector column_nullable; - - vector sql_types; - vector column_names; -}; - -struct WriteGlobalData : public GlobalFunctionData { - unique_ptr sink; -}; - -struct WriteLocalData : public LocalFunctionData {}; - -void WriteSink(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate, DataChunk &input) { - auto &global_state = gstate.Cast(); - auto bind = bind_data.Cast(); - - for (auto i = 0u; i < input.ColumnCount(); i++) { - input.data[i].Flatten(input.size()); - } - // TODO(joe): go to a model of combining local chunked into arrays of a specific size - // before push each of these larger chunks into the global_state - global_state.sink->PushChunk(input); -} - -std::vector TableNullability(ClientContext &context, const string &catalog_name, const string &schema, - const string &table) { - auto &catalog = Catalog::GetCatalog(context, catalog_name); - - QueryErrorContext error_context; - // Main is the default schema - auto schema_name = schema != "" ? schema : "main"; - - auto entry = catalog.GetEntry(context, schema_name, table, OnEntryNotFound::RETURN_NULL); - auto vec = std::vector(); - // entry can non-null and not a table entry - if (!entry || entry->type != CatalogType::TABLE_ENTRY) { - // If there is no entry, it is okay to return all nullable columns. - return vec; - } - - auto &table_entry = entry->Cast(); - for (auto &constraint : table_entry.GetConstraints()) { - if (constraint && constraint->type == ConstraintType::NOT_NULL) { - auto &null_constraint = constraint->Cast(); - vec.push_back(null_constraint.index.index); - } - } - return vec; -} - -void RegisterWriteFunction(DatabaseInstance &instance) { - CopyFunction function("vortex"); - function.copy_to_bind = [](ClientContext &context, CopyFunctionBindInput &input, const vector &names, - const vector &sql_types) -> unique_ptr { - auto result = make_uniq(); - - auto not_null = TableNullability(context, input.info.catalog, input.info.schema, input.info.table); - - result->column_nullable = std::vector(names.size(), true); - for (auto not_null_idx : not_null) { - result->column_nullable[not_null_idx] = false; - } - - result->sql_types = sql_types; - result->column_names = names; - return std::move(result); - }; - function.copy_to_initialize_global = [](ClientContext &context, FunctionData &bind_data, - const string &file_path) -> unique_ptr { - auto &bind = bind_data.Cast(); - auto gstate = make_uniq(); - - auto column_names = std::vector(); - for (const auto &col_id : bind.column_names) { - column_names.push_back(col_id.c_str()); - } - - auto column_types = std::vector(); - for (auto &col_type : bind.sql_types) { - column_types.push_back(reinterpret_cast(&col_type)); - } - - auto dtype = DType::FromDuckDBTable(column_types, bind.column_nullable, column_names); - gstate->sink = ArrayStreamSink::Create(file_path, std::move(dtype)); - return std::move(gstate); - }; - function.copy_to_initialize_local = [](ExecutionContext &context, - FunctionData &bind_data) -> unique_ptr { - return std::move(make_uniq()); - }; - function.copy_to_sink = WriteSink; - function.copy_to_finalize = [](ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { - auto &global_state = gstate.Cast(); - global_state.sink->Close(); - }; - function.execution_mode = [](bool preserve_insertion_order, - bool supports_batch_index) -> CopyFunctionExecutionMode { - return CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE; - }; - function.extension = "vortex"; - - ExtensionUtil::RegisterFunction(instance, function); -} - -} // namespace vortex diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt deleted file mode 100644 index e4175aa..0000000 --- a/test/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -include_directories(${CMAKE_SOURCE_DIR}/../src/include) - -add_executable(vortex_tests src/vortex_expr_test.cpp) -target_include_directories(vortex_tests PRIVATE - ${catch2_SOURCE_DIR}/single_include -) - -target_link_libraries(vortex_tests PRIVATE - ${EXTENSION_NAME} - Catch2::Catch2WithMain) - -enable_testing() -add_test(NAME AllTests COMMAND vortex_tests) diff --git a/test/sql/table.test b/test/sql/table.test index b6f8f02..b7359fe 100644 --- a/test/sql/table.test +++ b/test/sql/table.test @@ -19,10 +19,10 @@ CREATE TABLE generated_data_table ( decimal_col DECIMAL(10, 2), varchar_col VARCHAR, date_col DATE, - timmestamp_s_col TIMESTAMP_S, - timmestamp_ms_col TIMESTAMP_MS, - timmestamp_us_col TIMESTAMP, - timmestamp_ns_col TIMESTAMP_NS, + timestamp_s_col TIMESTAMP_S, + timestamp_ms_col TIMESTAMP_MS, + timestamp_us_col TIMESTAMP, + timestamp_ns_col TIMESTAMP_NS, blob_col BLOB, ); @@ -42,10 +42,10 @@ SELECT CAST(seq AS DECIMAL(10, 2)) / 10.0 AS decimal_col, 'Value ' || seq AS varchar_col, DATE '1992-03-22' + to_days(cast(seq AS INTEGER)) AS date_col, - TIMESTAMP_S '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_s_col, - TIMESTAMP_MS '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_ms_col, - TIMESTAMP '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_us_col, - TIMESTAMP_NS '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_ns_col, + TIMESTAMP_S '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timestamp_s_col, + TIMESTAMP_MS '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timestamp_ms_col, + TIMESTAMP '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timestamp_us_col, + TIMESTAMP_NS '1970-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timestamp_ns_col, CAST(md5(CAST(seq AS VARCHAR)) AS BLOB) AS blob_col, FROM generate_series(1, 100) AS t(seq); diff --git a/test/sql/table.test_slow b/test/sql/table.test_slow index c55bc83..4129742 100644 --- a/test/sql/table.test_slow +++ b/test/sql/table.test_slow @@ -19,10 +19,10 @@ CREATE TABLE generated_data_table ( decimal_col DECIMAL(10, 2), varchar_col VARCHAR, date_col DATE, - timmestamp_s_col TIMESTAMP_S, - timmestamp_ms_col TIMESTAMP_MS, - timmestamp_us_col TIMESTAMP, - timmestamp_ns_col TIMESTAMP_NS, + timestamp_s_col TIMESTAMP_S, + timestamp_ms_col TIMESTAMP_MS, + timestamp_us_col TIMESTAMP, + timestamp_ns_col TIMESTAMP_NS, blob_col BLOB, ); @@ -42,10 +42,10 @@ SELECT CAST(seq AS DECIMAL(10, 2)) / 10.0 AS decimal_col, 'Value ' || seq AS varchar_col, DATE '1992-03-22' + to_days(cast(seq AS INTEGER)) AS date_col, - TIMESTAMP_S '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_s_col, - TIMESTAMP_MS '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_ms_col, - TIMESTAMP '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_us_col, - TIMESTAMP_NS '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_ns_col, + TIMESTAMP_S '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timestamp_s_col, + TIMESTAMP_MS '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timestamp_ms_col, + TIMESTAMP '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timestamp_us_col, + TIMESTAMP_NS '1970-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timestamp_ns_col, CAST(md5(CAST(seq AS VARCHAR)) AS BLOB) AS blob_col, FROM generate_series(1, 40000) AS t(seq); diff --git a/test/sql/timestamp.test b/test/sql/timestamp.test index 7a23379..44b4065 100644 --- a/test/sql/timestamp.test +++ b/test/sql/timestamp.test @@ -10,7 +10,6 @@ CREATE TABLE generated_data_table ( timmestamp_s_col TIMESTAMP_S, timmestamp_ms_col TIMESTAMP_MS, timmestamp_us_col TIMESTAMP, - timmestamp_ns_col TIMESTAMP_NS, ); statement ok @@ -20,7 +19,6 @@ SELECT TIMESTAMP_S '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_s_col, TIMESTAMP_MS '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_ms_col, TIMESTAMP '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_us_col, - TIMESTAMP_NS '2025-01-01 00:00:00' + to_seconds(cast(seq AS INTEGER)) as timmestamp_ns_col, FROM generate_series(1, 100) AS t(seq); statement ok @@ -28,17 +26,15 @@ COPY generated_data_table TO '__TEST_DIR__/timestamp_test.vortex' (FORMAT VORTEX # Verify that all timestamp columns have the same type after round trip -query III +query II SELECT sum(timmestamp_s_col = timmestamp_ms_col) = count(id), sum(timmestamp_s_col = timmestamp_us_col) = count(id), - sum(timmestamp_s_col = timmestamp_ns_col) = count(id) FROM read_vortex('__TEST_DIR__/timestamp_test.vortex'); ---- 1 1 -1 # Verify that filtering works for ms query I @@ -50,14 +46,3 @@ WHERE timmestamp_ms_col > TIMESTAMP '2025-01-01 00:00:50' ---- 50 - -# Verify that filtering works for ns -query I -SELECT - count(id) -FROM - read_vortex('__TEST_DIR__/timestamp_test.vortex') -WHERE - timmestamp_ns_col > TIMESTAMP '2025-01-01 00:00:50' ----- -50 \ No newline at end of file diff --git a/test/src/vortex_expr_test.cpp b/test/src/vortex_expr_test.cpp deleted file mode 100644 index df67b34..0000000 --- a/test/src/vortex_expr_test.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include - -#include "catch2/catch_test_macros.hpp" -#include "duckdb.hpp" -#include "duckdb/planner/filter/constant_filter.hpp" - -#include "vortex_expr.hpp" - -TEST_CASE("Test DuckDB list handling", "[list]") { - google::protobuf::Arena arena; - - auto filter = duckdb::make_uniq(duckdb::ExpressionType::COMPARE_EQUAL, duckdb::Value(1)); - auto filter2 = duckdb::make_uniq(duckdb::ExpressionType::COMPARE_EQUAL, duckdb::Value(2)); - auto filter3 = duckdb::make_uniq(duckdb::ExpressionType::COMPARE_EQUAL, duckdb::Value(3)); - - auto filter_and = duckdb::ConjunctionAndFilter(); - - filter_and.child_filters.push_back(std::move(filter)); - filter_and.child_filters.push_back(std::move(filter2)); - filter_and.child_filters.push_back(std::move(filter3)); - - auto col = std::string("a"); - auto expr = vortex::table_expression_into_expr(arena, filter_and, col); - - REQUIRE(expr->children().size() == 2); - REQUIRE(expr->kind().binary_op() == vortex::expr::Kind_BinaryOp_And); - - REQUIRE(expr->children()[0].children().size() == 2); - REQUIRE(expr->children()[0].kind().binary_op() == vortex::expr::Kind_BinaryOp_Eq); -} diff --git a/vcpkg.json b/vcpkg.json index 61c33b9..4208a98 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,3 +1,3 @@ { - "dependencies": [{ "name": "catch2" }, { "name": "protobuf" }] + "dependencies": [] } diff --git a/vortex b/vortex index eb872e3..eec0e17 160000 --- a/vortex +++ b/vortex @@ -1 +1 @@ -Subproject commit eb872e3da140051b4929267b1b72b548cd3bb6a8 +Subproject commit eec0e179bb9c5e808bee6b7677d1cd3f73b8e78e