diff --git a/.github/workflows/arrow-flight-tests.yml b/.github/workflows/arrow-flight-tests.yml index 3aab27ff09c58..7418442e785b6 100644 --- a/.github/workflows/arrow-flight-tests.yml +++ b/.github/workflows/arrow-flight-tests.yml @@ -60,7 +60,7 @@ jobs: prestocpp-linux-build-for-test: runs-on: ubuntu-22.04 container: - image: prestodb/presto-native-dependency:0.292-20250204112033-cf8ba84 + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 concurrency: group: ${{ github.workflow }}-prestocpp-linux-build-for-test-${{ github.event.pull_request.number }} cancel-in-progress: true @@ -122,7 +122,7 @@ jobs: -DCMAKE_PREFIX_PATH=/usr/local \ -DThrift_ROOT=/usr/local \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DMAX_LINK_JOBS=4 + -DMAX_LINK_JOBS=3 ninja -C _build/release -j 4 - name: Ccache after @@ -154,7 +154,7 @@ jobs: needs: prestocpp-linux-build-for-test runs-on: ubuntu-22.04 container: - image: prestodb/presto-native-dependency:0.292-20250204112033-cf8ba84 + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 env: INSTALL_PREFIX: "${{ github.workspace }}/adapter-deps/install" strategy: diff --git a/.github/workflows/prestocpp-linux-adapters-build.yml b/.github/workflows/prestocpp-linux-adapters-build.yml index 362b3fcbbacee..546cf0f395e46 100644 --- a/.github/workflows/prestocpp-linux-adapters-build.yml +++ b/.github/workflows/prestocpp-linux-adapters-build.yml @@ -11,7 +11,7 @@ jobs: prestocpp-linux-adapters-build: runs-on: ubuntu-22.04 container: - image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 concurrency: group: ${{ github.workflow }}-prestocpp-linux-adapters-build-${{ github.event.pull_request.number }} cancel-in-progress: true diff --git a/.github/workflows/prestocpp-linux-build-and-unit-test.yml b/.github/workflows/prestocpp-linux-build-and-unit-test.yml index e77a4bb152fd6..88852fce0285c 100644 --- a/.github/workflows/prestocpp-linux-build-and-unit-test.yml +++ b/.github/workflows/prestocpp-linux-build-and-unit-test.yml @@ -30,7 +30,7 @@ jobs: runs-on: ubuntu-22.04 needs: changes container: - image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 concurrency: group: ${{ github.workflow }}-prestocpp-linux-build-test-${{ github.event.pull_request.number }} cancel-in-progress: true @@ -135,7 +135,7 @@ jobs: needs: [changes, prestocpp-linux-build-for-test] runs-on: ubuntu-22.04 container: - image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 volumes: - /usr:/host_usr - /opt:/host_opt @@ -260,7 +260,7 @@ jobs: storage-format: [PARQUET, DWRF] enable-sidecar: [true, false] container: - image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 volumes: - /usr:/host_usr - /opt:/host_opt @@ -388,7 +388,7 @@ jobs: group: ${{ github.workflow }}-prestocpp-linux-presto-on-spark-e2e-tests-${{ matrix.storage-format }}-${{ matrix.enable-sidecar }}-${{ github.event.pull_request.number }} cancel-in-progress: true container: - image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 volumes: - /usr:/host_usr - /opt:/host_opt @@ -501,7 +501,7 @@ jobs: needs: [changes, prestocpp-linux-build-for-test] runs-on: ubuntu-22.04 container: - image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 volumes: - /usr:/host_usr - /opt:/host_opt @@ -613,7 +613,7 @@ jobs: needs: [changes, prestocpp-linux-build-for-test] runs-on: ubuntu-22.04 container: - image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 volumes: - /usr:/host_usr - /opt:/host_opt diff --git a/.github/workflows/prestocpp-linux-build.yml b/.github/workflows/prestocpp-linux-build.yml index 872b43adf90e0..7ee1171e957f4 100644 --- a/.github/workflows/prestocpp-linux-build.yml +++ b/.github/workflows/prestocpp-linux-build.yml @@ -26,7 +26,7 @@ jobs: contents: read needs: changes container: - image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea + image: prestodb/presto-native-dependency:0.297-202601311423-22d722a9 volumes: - /usr:/host_usr - /opt:/host_opt diff --git a/.github/workflows/prestocpp-macos-build.yml b/.github/workflows/prestocpp-macos-build.yml index 7c2b6421a43ba..32bf241d58670 100644 --- a/.github/workflows/prestocpp-macos-build.yml +++ b/.github/workflows/prestocpp-macos-build.yml @@ -76,6 +76,10 @@ jobs: install_velox_deps_from_brew install_double_conversion + # Install glog/gflags because they are not installed from homebrew. + install_gflags + install_glog + # Velox deps needed by proxygen, a presto dependency. install_boost install_fmt diff --git a/presto-native-execution/CMakeLists.txt b/presto-native-execution/CMakeLists.txt index fc5d1ddf62696..97f3973145950 100644 --- a/presto-native-execution/CMakeLists.txt +++ b/presto-native-execution/CMakeLists.txt @@ -77,10 +77,8 @@ option(PRESTO_ENABLE_ARROW_FLIGHT_CONNECTOR "Enable Arrow Flight connector" OFF) option(PRESTO_ENABLE_SPATIAL "Enable spatial support" ON) -# Set all Velox options below and make sure that if we include folly headers or -# other dependency headers that include folly headers we turn off the coroutines -# and turn on int128. -add_compile_definitions(FOLLY_HAVE_INT128_T=1 FOLLY_CFG_NO_COROUTINES) +# Turn on folly int128 support. +add_compile_definitions(FOLLY_HAVE_INT128_T=1) set(VELOX_ENABLE_S3 ${PRESTO_ENABLE_S3} CACHE BOOL "Build S3 support") @@ -184,6 +182,9 @@ find_library(PROXYGEN_HTTP_SERVER proxygenhttpserver) find_library(FIZZ fizz) find_library(WANGLE wangle) find_library(MVFST_EXCEPTION mvfst_exception) +find_library(MVFST_FOLLY_UTILS mvfst_folly_utils) +find_library(MVFST_CODEC_TYPES mvfst_codec_types) +find_library(MVFST_CONTIGUOUS_CURSOR mvfst_contiguous_cursor) find_library(RE2 re2) @@ -199,6 +200,9 @@ set( ${WANGLE} ${FIZZ} ${MVFST_EXCEPTION} + ${MVFST_FOLLY_UTILS} + ${MVFST_CODEC_TYPES} + ${MVFST_CONTIGUOUS_CURSOR} ) find_path(PROXYGEN_DIR NAMES include/proxygen) set(PROXYGEN_INCLUDE_DIR "${PROXYGEN_DIR}/include/") diff --git a/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.cpp b/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.cpp index b6ad329f0e5ae..2ad4e21b3264f 100644 --- a/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.cpp +++ b/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.cpp @@ -150,6 +150,22 @@ toVeloxIcebergPartitionSpec( spec.specId, fields); } +velox::parquet::ParquetFieldId toParquetField( + const protocol::iceberg::ColumnIdentity& column) { + std::vector children; + if (!column.children.empty()) { + children.reserve(column.children.size()); + for (const auto& child : column.children) { + children.push_back(toParquetField(child)); + } + } + // ParquetFieldId does not declare a constructor that takes fieldId and + // children, so we use aggregate initialization to make it work for compilers + // that don't create the necessary constructors by default (e.g clang-15). + velox::parquet::ParquetFieldId pf{.fieldId = column.id, .children = children}; + return pf; +} + } // namespace std::unique_ptr @@ -234,13 +250,13 @@ IcebergPrestoToVeloxConnector::toVeloxColumnHandle( columnParseParameters.partitionDateValueFormat = velox::connector::hive:: HiveColumnHandle::ColumnParseParameters::kDaysSinceEpoch; } - return std::make_unique( + + return std::make_unique( icebergColumn->columnIdentity.name, toHiveColumnType(icebergColumn->columnType), type, - type, - toRequiredSubfields(icebergColumn->requiredSubfields), - columnParseParameters); + toParquetField(icebergColumn->columnIdentity), + toRequiredSubfields(icebergColumn->requiredSubfields)); } std::unique_ptr @@ -324,7 +340,7 @@ IcebergPrestoToVeloxConnector::toVeloxInsertTableHandle( createHandle->handle.connectorHandle->_type); const auto inputColumns = - toHiveColumns(icebergOutputTableHandle->inputColumns, typeParser); + toIcebergColumns(icebergOutputTableHandle->inputColumns, typeParser); return std::make_unique< velox::connector::hive::iceberg::IcebergInsertTableHandle>( @@ -354,7 +370,7 @@ IcebergPrestoToVeloxConnector::toVeloxInsertTableHandle( insertHandle->handle.connectorHandle->_type); const auto inputColumns = - toHiveColumns(icebergInsertTableHandle->inputColumns, typeParser); + toIcebergColumns(icebergInsertTableHandle->inputColumns, typeParser); return std::make_unique< velox::connector::hive::iceberg::IcebergInsertTableHandle>( @@ -370,18 +386,20 @@ IcebergPrestoToVeloxConnector::toVeloxInsertTableHandle( toFileCompressionKind(icebergInsertTableHandle->compressionCodec))); } -std::vector -IcebergPrestoToVeloxConnector::toHiveColumns( +std::vector +IcebergPrestoToVeloxConnector::toIcebergColumns( const protocol::List& inputColumns, const TypeParser& typeParser) const { - std::vector hiveColumns; - hiveColumns.reserve(inputColumns.size()); + std::vector + icebergColumns; + icebergColumns.reserve(inputColumns.size()); for (const auto& columnHandle : inputColumns) { - hiveColumns.emplace_back( - std::dynamic_pointer_cast( + icebergColumns.emplace_back( + std::dynamic_pointer_cast< + velox::connector::hive::iceberg::IcebergColumnHandle>( std::shared_ptr(toVeloxColumnHandle(&columnHandle, typeParser)))); } - return hiveColumns; + return icebergColumns; } } // namespace facebook::presto diff --git a/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.h b/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.h index c9336ba6c9bc4..7d9c3cac00fbb 100644 --- a/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.h +++ b/presto-native-execution/presto_cpp/main/connectors/IcebergPrestoToVeloxConnector.h @@ -17,6 +17,8 @@ #include "presto_cpp/main/connectors/PrestoToVeloxConnector.h" #include "presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.h" +#include "velox/connectors/hive/iceberg/IcebergColumnHandle.h" + namespace facebook::presto { class IcebergPrestoToVeloxConnector final : public PrestoToVeloxConnector { @@ -52,7 +54,8 @@ class IcebergPrestoToVeloxConnector final : public PrestoToVeloxConnector { const TypeParser& typeParser) const final; private: - std::vector toHiveColumns( + std::vector + toIcebergColumns( const protocol::List& inputColumns, const TypeParser& typeParser) const; diff --git a/presto-native-execution/presto_cpp/main/thrift/CMakeLists.txt b/presto-native-execution/presto_cpp/main/thrift/CMakeLists.txt index 9deb725d94d06..a551a8f2aea72 100644 --- a/presto-native-execution/presto_cpp/main/thrift/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/thrift/CMakeLists.txt @@ -16,8 +16,30 @@ find_library(THRIFT_CORE thrift-core) find_library(THRIFT_PROTOCOL thriftprotocol) find_library(THRIFT_METADATA thriftmetadata) find_library(THRIFT_TRANSPORT transport) +find_library(THRIFT_ASYNC async) +find_library(THRIFT_RPC_METADATA rpcmetadata) +find_library(THRIFT_TYPEREP thrifttyperep) + find_path(THRIFT_INCLUDES thrift/lib/cpp2/gen/module_data_h.h PATH_SUFFIXES include REQUIRED) +# In the CI /opt/homebrew/include is not included and it leads to missing +# event.h. +if(APPLE AND EXISTS "/opt/homebrew") + include_directories(SYSTEM /opt/homebrew/include) +endif() + +set( + presto_thrift_library_dependencies + ${THRIFT_PROTOCOL} + ${THRIFT_METADATA} + ${THRIFT_CORE} + ${THRIFT_TYPEREP} + ${THRIFT_ASYNC} + ${THRIFT_TRANSPORT} + ${THRIFT_RPC_METADATA} + ${FOLLY_WITH_DEPENDENCIES} +) + include(ThriftLibrary.cmake) thrift_library( @@ -29,13 +51,7 @@ thrift_library( ${CMAKE_CURRENT_BINARY_DIR}/presto_cpp/main/thrift ".." ) -target_link_libraries( - presto_thrift-cpp2 - ${THRIFT_PROTOCOL} - ${THRIFT_METADATA} - ${THRIFT_CORE} - ${THRIFT_TRANSPORT} -) +target_link_libraries(presto_thrift-cpp2 ${presto_thrift_library_dependencies}) set(presto_thrift_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories(presto_thrift-cpp2 PUBLIC ${presto_thrift_INCLUDES} ${GLOG_INCLUDE_DIR}) target_include_directories(presto_thrift-cpp2-obj PUBLIC ${THRIFT_INCLUDES} ${GLOG_INCLUDE_DIR}) @@ -49,13 +65,7 @@ thrift_library( ${CMAKE_CURRENT_BINARY_DIR}/presto_cpp/main/thrift ".." ) -target_link_libraries( - presto_native-cpp2 - ${THRIFT_PROTOCOL} - ${THRIFT_METADATA} - ${THRIFT_CORE} - ${THRIFT_TRANSPORT} -) +target_link_libraries(presto_native-cpp2 ${presto_thrift_library_dependencies}) set(presto_native_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories(presto_native-cpp2 PUBLIC ${presto_native_INCLUDES} ${GLOG_INCLUDE_DIR}) target_include_directories(presto_native-cpp2-obj PUBLIC ${THRIFT_INCLUDES} ${GLOG_INCLUDE_DIR}) diff --git a/presto-native-execution/presto_cpp/main/thrift/ThriftLibrary.cmake b/presto-native-execution/presto_cpp/main/thrift/ThriftLibrary.cmake index e63a7556d2cf6..fb547388c8b49 100644 --- a/presto-native-execution/presto_cpp/main/thrift/ThriftLibrary.cmake +++ b/presto-native-execution/presto_cpp/main/thrift/ThriftLibrary.cmake @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -117,7 +117,7 @@ endmacro() # #include_prefix # ) # add_library(somelib ...) -# target_link_libraries(somelibe ${file_name}-${language} ...) +# target_link_libraries(somelib ${file_name}-${language} ...) # macro(thrift_library @@ -167,15 +167,21 @@ endmacro() # # thrift_generate # This is used to codegen thrift files using the thrift compiler +# Supports library names that differ from the file name (to handle two libraries +# with the same filename on disk (in different folders)) # Params: -# @file_name - The name of tge thrift file +# @file_name - Input file name. Will be used for naming the CMake +# target if TARGET_NAME_BASE is not specified. # @services - A list of services that are declared in the thrift file # @language - The generator to use (cpp, cpp2 or py3) # @options - Extra options to pass to the generator # @output_path - The directory where the thrift file lives +# @include_prefix - Prefix to use for thrift includes in generated sources +# @TARGET_NAME_BASE (optional) - name used for target instead of real filename +# @THRIFT_INCLUDE_DIRECTORIES (optional) path to thrift include directories # # Output: -# file-language-target - A custom target to add a dependenct +# file-language-target - A custom target to add a dependency # ${file-language-HEADERS} - The generated Header Files. # ${file-language-SOURCES} - The generated Source Files. # @@ -186,7 +192,6 @@ endmacro() # bypass_source_check(${file_language-SOURCES}) # This will prevent cmake from complaining about missing source files # - macro(thrift_generate file_name services @@ -198,43 +203,59 @@ macro(thrift_generate ) cmake_parse_arguments(THRIFT_GENERATE # Prefix "" # Options - "" # One Value args + "TARGET_NAME_BASE" # One Value args "THRIFT_INCLUDE_DIRECTORIES" # Multi-value args "${ARGN}") + set(source_file_name ${file_name}) + set(target_file_name ${file_name}) set(thrift_include_directories) foreach(dir ${THRIFT_GENERATE_THRIFT_INCLUDE_DIRECTORIES}) list(APPEND thrift_include_directories "-I" "${dir}") endforeach() + if(DEFINED THRIFT_GENERATE_TARGET_NAME_BASE + AND NOT THRIFT_GENERATE_TARGET_NAME_BASE STREQUAL "") + set(target_file_name ${THRIFT_GENERATE_TARGET_NAME_BASE}) + endif() - set("${file_name}-${language}-HEADERS" - ${output_path}/gen-${language}/${file_name}_constants.h - ${output_path}/gen-${language}/${file_name}_data.h - ${output_path}/gen-${language}/${file_name}_metadata.h - ${output_path}/gen-${language}/${file_name}_types.h - ${output_path}/gen-${language}/${file_name}_types.tcc + set("${target_file_name}-${language}-HEADERS" + ${output_path}/gen-${language}/${source_file_name}_constants.h + ${output_path}/gen-${language}/${source_file_name}_data.h + ${output_path}/gen-${language}/${source_file_name}_metadata.h + ${output_path}/gen-${language}/${source_file_name}_types.h + ${output_path}/gen-${language}/${source_file_name}_types.tcch + ${output_path}/gen-${language}/${source_file_name}_types_custom_protocol.h ) - set("${file_name}-${language}-SOURCES" - ${output_path}/gen-${language}/${file_name}_constants.cpp - ${output_path}/gen-${language}/${file_name}_data.cpp - ${output_path}/gen-${language}/${file_name}_types.cpp + set("${target_file_name}-${language}-SOURCES" + ${output_path}/gen-${language}/${source_file_name}_constants.cpp + ${output_path}/gen-${language}/${source_file_name}_data.cpp + ${output_path}/gen-${language}/${source_file_name}_types.cpp + ${output_path}/gen-${language}/${source_file_name}_types_binary.cpp + ${output_path}/gen-${language}/${source_file_name}_types_compact.cpp + ${output_path}/gen-${language}/${source_file_name}_types_serialization.cpp ) + if("${options}" MATCHES "layouts") + set("${target_file_name}-${language}-SOURCES" + ${${target_file_name}-${language}-SOURCES} + ${output_path}/gen-${language}/${source_file_name}_layouts.cpp + ) + endif() if(NOT "${options}" MATCHES "no_metadata") - set("${file_name}-${language}-SOURCES" - ${${file_name}-${language}-SOURCES} - ${output_path}/gen-${language}/${file_name}_metadata.cpp + set("${target_file_name}-${language}-SOURCES" + ${${target_file_name}-${language}-SOURCES} + ${output_path}/gen-${language}/${source_file_name}_metadata.cpp ) endif() foreach(service ${services}) - set("${file_name}-${language}-HEADERS" - ${${file_name}-${language}-HEADERS} + set("${target_file_name}-${language}-HEADERS" + ${${source_file_name}-${language}-HEADERS} ${output_path}/gen-${language}/${service}.h ${output_path}/gen-${language}/${service}.tcc ${output_path}/gen-${language}/${service}AsyncClient.h ${output_path}/gen-${language}/${service}_custom_protocol.h ) - set("${file_name}-${language}-SOURCES" - ${${file_name}-${language}-SOURCES} + set("${target_file_name}-${language}-SOURCES" + ${${source_file_name}-${language}-SOURCES} ${output_path}/gen-${language}/${service}.cpp ${output_path}/gen-${language}/${service}AsyncClient.cpp ) @@ -252,26 +273,27 @@ macro(thrift_generate set(gen_language "mstch_cpp2") elseif("${language}" STREQUAL "py3") set(gen_language "mstch_py3") - file(WRITE "${output_path}/gen-${language}/${file_name}/__init__.py") + file(WRITE "${output_path}/gen-${language}/${source_file_name}/__init__.py") endif() + message(STATUS "Thrift command:") + message(STATUS "${THRIFT1} -v --gen \"${gen_language}:${options}${include_prefix_text}\" -o ${output_path} ${thrift_include_directories} \"${file_path}/${source_file_name}.thrift\"") add_custom_command( - OUTPUT ${${file_name}-${language}-HEADERS} - ${${file_name}-${language}-SOURCES} - COMMAND mkdir -p ${output_path} + OUTPUT ${${target_file_name}-${language}-HEADERS} + ${${target_file_name}-${language}-SOURCES} COMMAND ${THRIFT1} --gen "${gen_language}:${options}${include_prefix_text}" -o ${output_path} ${thrift_include_directories} - "${file_path}/${file_name}.thrift" + "${file_path}/${source_file_name}.thrift" DEPENDS ${THRIFT1} - "${file_path}/${file_name}.thrift" - COMMENT "Generating ${file_name} files. Output: ${output_path}" + "${file_path}/${source_file_name}.thrift" + COMMENT "Generating ${target_file_name} files. Output: ${output_path}. Command: ${THRIFT1} -v --gen \"${gen_language}:${options}${include_prefix_text}\" -o ${output_path} ${thrift_include_directories} \"${file_path}/${source_file_name}.thrift\"" ) add_custom_target( - ${file_name}-${language}-target ALL + ${target_file_name}-${language}-target ALL DEPENDS ${${language}-${language}-HEADERS} - ${${file_name}-${language}-SOURCES} + ${${target_file_name}-${language}-SOURCES} ) install( DIRECTORY gen-${language} diff --git a/presto-native-execution/presto_cpp/main/thrift/tests/CMakeLists.txt b/presto-native-execution/presto_cpp/main/thrift/tests/CMakeLists.txt index f75598ff1d4d7..2927edf96d82b 100644 --- a/presto-native-execution/presto_cpp/main/thrift/tests/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/thrift/tests/CMakeLists.txt @@ -24,12 +24,6 @@ target_link_libraries( presto_thrift_extra presto_thrift-cpp2 presto_native-cpp2 - ${THRIFTCPP2} - ${THRIFT_PROTOCOL} - ${THRIFT_METADATA} - ${THRIFT_CORE} - ${THRIFT_TRANSPORT} - ${FOLLY_WITH_DEPENDENCIES} ${RE2} GTest::gmock GTest::gtest diff --git a/presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp b/presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp index 6f3725faadfb3..69458fa279eb5 100644 --- a/presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp +++ b/presto-native-execution/presto_cpp/main/types/tests/PrestoToVeloxConnectorTest.cpp @@ -21,6 +21,7 @@ #include "velox/common/base/tests/GTestUtils.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/TableHandle.h" +#include "velox/connectors/hive/iceberg/IcebergColumnHandle.h" using namespace facebook::presto; using namespace facebook::velox; @@ -185,3 +186,110 @@ TEST_F(PrestoToVeloxConnectorTest, hiveLowercasesColumnNames) { EXPECT_EQ(dataColumnsType->nameOf(0), "mixedcasecol1"); EXPECT_EQ(dataColumnsType->nameOf(1), "uppercasecol2"); } + +namespace { + +protocol::iceberg::IcebergColumnHandle createIcebergColumnHandle( + const std::string& name, + int32_t fieldId, + const std::string& type, + protocol::iceberg::TypeCategory typeCategory = + protocol::iceberg::TypeCategory::PRIMITIVE, + const std::vector& children = {}) { + protocol::iceberg::IcebergColumnHandle column; + column.columnIdentity.name = name; + column.columnIdentity.id = fieldId; + column.columnIdentity.typeCategory = typeCategory; + column.columnIdentity.children = children; + column.type = type; + column.columnType = protocol::hive::ColumnType::REGULAR; + return column; +} + +} // namespace + +TEST_F(PrestoToVeloxConnectorTest, icebergColumnHandleSimple) { + auto icebergColumn = createIcebergColumnHandle("col1", 1, "integer"); + + IcebergPrestoToVeloxConnector icebergConnector("iceberg"); + auto handle = + icebergConnector.toVeloxColumnHandle(&icebergColumn, *typeParser_); + auto* icebergHandle = + dynamic_cast( + handle.get()); + ASSERT_NE(icebergHandle, nullptr); + + EXPECT_EQ(icebergHandle->name(), "col1"); + EXPECT_EQ(icebergHandle->dataType()->kind(), TypeKind::INTEGER); + EXPECT_EQ(icebergHandle->field().fieldId, 1); + EXPECT_TRUE(icebergHandle->field().children.empty()); +} + +TEST_F(PrestoToVeloxConnectorTest, icebergColumnHandleNested) { + protocol::iceberg::ColumnIdentity child1; + child1.name = "child1"; + child1.id = 2; + child1.typeCategory = protocol::iceberg::TypeCategory::PRIMITIVE; + + protocol::iceberg::ColumnIdentity child2; + child2.name = "child2"; + child2.id = 3; + child2.typeCategory = protocol::iceberg::TypeCategory::PRIMITIVE; + + auto icebergColumn = createIcebergColumnHandle( + "struct_col", + 1, + "row(child1 integer, child2 varchar)", + protocol::iceberg::TypeCategory::STRUCT, + {child1, child2}); + + IcebergPrestoToVeloxConnector icebergConnector("iceberg"); + auto handle = + icebergConnector.toVeloxColumnHandle(&icebergColumn, *typeParser_); + auto* icebergHandle = + dynamic_cast( + handle.get()); + ASSERT_NE(icebergHandle, nullptr); + + EXPECT_EQ(icebergHandle->name(), "struct_col"); + EXPECT_EQ(icebergHandle->dataType()->kind(), TypeKind::ROW); + EXPECT_EQ(icebergHandle->field().fieldId, 1); + ASSERT_EQ(icebergHandle->field().children.size(), 2); + EXPECT_EQ(icebergHandle->field().children[0].fieldId, 2); + EXPECT_EQ(icebergHandle->field().children[1].fieldId, 3); +} + +TEST_F(PrestoToVeloxConnectorTest, icebergColumnHandleDeeplyNested) { + protocol::iceberg::ColumnIdentity inner; + inner.name = "inner"; + inner.id = 3; + inner.typeCategory = protocol::iceberg::TypeCategory::PRIMITIVE; + + protocol::iceberg::ColumnIdentity middle; + middle.name = "middle"; + middle.id = 2; + middle.typeCategory = protocol::iceberg::TypeCategory::STRUCT; + middle.children = {inner}; + + auto icebergColumn = createIcebergColumnHandle( + "outer", + 1, + "row(middle row(inner bigint))", + protocol::iceberg::TypeCategory::STRUCT, + {middle}); + + IcebergPrestoToVeloxConnector icebergConnector("iceberg"); + auto handle = + icebergConnector.toVeloxColumnHandle(&icebergColumn, *typeParser_); + auto* icebergHandle = + dynamic_cast( + handle.get()); + ASSERT_NE(icebergHandle, nullptr); + + EXPECT_EQ(icebergHandle->name(), "outer"); + EXPECT_EQ(icebergHandle->field().fieldId, 1); + ASSERT_EQ(icebergHandle->field().children.size(), 1); + EXPECT_EQ(icebergHandle->field().children[0].fieldId, 2); + ASSERT_EQ(icebergHandle->field().children[0].children.size(), 1); + EXPECT_EQ(icebergHandle->field().children[0].children[0].fieldId, 3); +} diff --git a/presto-native-execution/scripts/setup-centos.sh b/presto-native-execution/scripts/setup-centos.sh index 4e246ab33b6aa..f6226ff2d9cd7 100755 --- a/presto-native-execution/scripts/setup-centos.sh +++ b/presto-native-execution/scripts/setup-centos.sh @@ -32,7 +32,8 @@ fi export NPROC=${NPROC:-$(getconf _NPROCESSORS_ONLN)} function install_presto_deps_from_package_managers { - dnf install -y maven java clang-tools-extra jq perl-XML-XPath + # proxygen requires c-ares-devel + dnf install -y maven java clang-tools-extra jq perl-XML-XPath c-ares-devel # This python version is installed by the Velox setup scripts pip install regex pyyaml chevron black ptsd-jbroll } @@ -54,9 +55,6 @@ function install_gperf { function install_proxygen { wget_and_untar https://github.com/facebook/proxygen/archive/refs/tags/${FB_OS_VERSION}.tar.gz proxygen - # Folly Portability.h being used to decide whether or not support coroutines - # causes issues (build, lin) if the selection is not consistent across users of folly. - EXTRA_PKG_CXXFLAGS=" -DFOLLY_CFG_NO_COROUTINES" cmake_install_dir proxygen -DBUILD_TESTS=OFF -DBUILD_SHARED_LIBS=ON } diff --git a/presto-native-execution/scripts/setup-macos.sh b/presto-native-execution/scripts/setup-macos.sh index e773d4210438c..b3a48c133b2a7 100755 --- a/presto-native-execution/scripts/setup-macos.sh +++ b/presto-native-execution/scripts/setup-macos.sh @@ -28,9 +28,6 @@ DATASKETCHES_VERSION="5.2.0" function install_proxygen { wget_and_untar https://github.com/facebook/proxygen/archive/refs/tags/${FB_OS_VERSION}.tar.gz proxygen - # Folly Portability.h being used to decide whether or not support coroutines - # causes issues (build, lin) if the selection is not consistent across users of folly. - EXTRA_PKG_CXXFLAGS=" -DFOLLY_CFG_NO_COROUTINES" cmake_install_dir proxygen -DBUILD_TESTS=OFF } diff --git a/presto-native-execution/scripts/setup-ubuntu.sh b/presto-native-execution/scripts/setup-ubuntu.sh index 70c172b50a0c7..5332933908b48 100755 --- a/presto-native-execution/scripts/setup-ubuntu.sh +++ b/presto-native-execution/scripts/setup-ubuntu.sh @@ -22,13 +22,10 @@ SUDO="${SUDO:-"sudo --preserve-env"}" DATASKETCHES_VERSION="5.2.0" function install_proxygen { - # proxygen requires python and gperf + # proxygen requires python, gperf, and libc-ares-dev ${SUDO} apt update - ${SUDO} apt install -y gperf python3 + ${SUDO} apt install -y gperf python3 libc-ares-dev wget_and_untar https://github.com/facebook/proxygen/archive/refs/tags/${FB_OS_VERSION}.tar.gz proxygen - # Folly Portability.h being used to decide whether or not support coroutines - # causes issues (build, lin) if the selection is not consistent across users of folly. - EXTRA_PKG_CXXFLAGS=" -DFOLLY_CFG_NO_COROUTINES" cmake_install_dir proxygen -DBUILD_TESTS=OFF } diff --git a/presto-native-execution/velox b/presto-native-execution/velox index 59b492a9bce45..a25666b541b72 160000 --- a/presto-native-execution/velox +++ b/presto-native-execution/velox @@ -1 +1 @@ -Subproject commit 59b492a9bce45f487f24b5cbae7dc845ea3d0827 +Subproject commit a25666b541b72f8f05de14c8b85ea9018237b393