From 3f384c8948efc45b3664ec33d665669c8d0154f2 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Sat, 8 Feb 2025 21:58:45 +0800 Subject: [PATCH 01/11] WIP: explore nanoarrow and sparrow --- .github/workflows/test.yml | 12 +++- .../IcebergThirdpartyToolchain.cmake | 56 +++++++++++++++++++ src/iceberg/CMakeLists.txt | 20 ++++++- 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 502941462..184362228 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,6 +48,11 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 + - name: Install Date Library + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y libdate-dev - name: Build Iceberg shell: bash run: ci/scripts/build_iceberg.sh $(pwd) @@ -65,6 +70,10 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 + - name: Install Date Library + shell: bash + run: | + brew install howard-hinnant-date - name: Build Iceberg shell: bash run: ci/scripts/build_iceberg.sh $(pwd) @@ -80,10 +89,11 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - - name: Install ZLIB + - name: Install ZLIB and Date Library shell: cmd run: | vcpkg install zlib:x64-windows + vcpkg install date:x64-windows - name: Build Iceberg shell: cmd run: | diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 0f95ea429..55aa7f5b3 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -198,3 +198,59 @@ endfunction() if(ICEBERG_AVRO) resolve_avro_dependency() endif() + +# ---------------------------------------------------------------------- +# Nanoarrow + +# It is also possible to vendor nanoarrow using the bundled source code. +function(resolve_nanoarrow_dependency) + prepare_fetchcontent() + fetchcontent_declare(nanoarrow + ${FC_DECLARE_COMMON_OPTIONS} + URL "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.6.0/apache-arrow-nanoarrow-0.6.0.tar.gz" + ) + fetchcontent_makeavailable(nanoarrow) + + set_target_properties(nanoarrow PROPERTIES OUTPUT_NAME "iceberg_vendored_nanoarrow") + install(TARGETS nanoarrow + EXPORT iceberg_targets + RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}" + ARCHIVE DESTINATION "${ICEBERG_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${ICEBERG_INSTALL_LIBDIR}") +endfunction() + +resolve_nanoarrow_dependency() + +# ---------------------------------------------------------------------- +# Sparrow + +function(resolve_sparrow_dependency) + prepare_fetchcontent() + + # Add the sparrow cmake module path to the CMAKE_MODULE_PATH + # Otherwise we will see error below: + # include could not find requested file: sanitizers + list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR}/_deps/sparrow-src/cmake) + + fetchcontent_declare(sparrow + ${FC_DECLARE_COMMON_OPTIONS} + GIT_REPOSITORY https://github.com/man-group/sparrow.git + GIT_TAG b0794cace22a55c32e90c0236034e040b822b957 # 0.3.0 + ) + fetchcontent_makeavailable(sparrow) + + set_target_properties(sparrow PROPERTIES OUTPUT_NAME "iceberg_vendored_sparrow") + install(TARGETS sparrow + EXPORT iceberg_targets + RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}" + ARCHIVE DESTINATION "${ICEBERG_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${ICEBERG_INSTALL_LIBDIR}") + + # sparrow depends on date::date and date::date-tz + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES date) + set(ICEBERG_SYSTEM_DEPENDENCIES + ${ICEBERG_SYSTEM_DEPENDENCIES} + PARENT_SCOPE) +endfunction() + +resolve_sparrow_dependency() diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 8411c7ac5..964410751 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -17,11 +17,29 @@ set(ICEBERG_SOURCES demo_table.cc schema.cc schema_field.cc type.cc) +set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS) +set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS) +set(ICEBERG_STATIC_INSTALL_INTERFACE_LIBS) +set(ICEBERG_SHARED_INSTALL_INTERFACE_LIBS) + +list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow sparrow) +list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow sparrow) +list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow" "Iceberg::sparrow") +list(APPEND ICEBERG_SHARED_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow" "Iceberg::sparrow") + add_iceberg_lib(iceberg SOURCES ${ICEBERG_SOURCES} PRIVATE_INCLUDES - ${ICEBERG_INCLUDES}) + ${ICEBERG_INCLUDES} + SHARED_LINK_LIBS + ${ICEBERG_SHARED_BUILD_INTERFACE_LIBS} + STATIC_LINK_LIBS + ${ICEBERG_STATIC_BUILD_INTERFACE_LIBS} + STATIC_INSTALL_INTERFACE_LIBS + ${ICEBERG_STATIC_INSTALL_INTERFACE_LIBS} + SHARED_INSTALL_INTERFACE_LIBS + ${ICEBERG_SHARED_INSTALL_INTERFACE_LIBS}) iceberg_install_all_headers(iceberg) From b6d778bd8345d6f891a0472b096d7861e6b2df3e Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Sun, 9 Feb 2025 13:46:46 +0800 Subject: [PATCH 02/11] fix date lib on Ubuntu CI --- .github/workflows/test.yml | 2 +- cmake_modules/IcebergThirdpartyToolchain.cmake | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 184362228..0ffcbc606 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -52,7 +52,7 @@ jobs: shell: bash run: | sudo apt-get update - sudo apt-get install -y libdate-dev + sudo apt-get install -y libdate-tz3 libhowardhinnant-date-dev - name: Build Iceberg shell: bash run: ci/scripts/build_iceberg.sh $(pwd) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 55aa7f5b3..8ace31574 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -246,7 +246,8 @@ function(resolve_sparrow_dependency) ARCHIVE DESTINATION "${ICEBERG_INSTALL_LIBDIR}" LIBRARY DESTINATION "${ICEBERG_INSTALL_LIBDIR}") - # sparrow depends on date::date and date::date-tz + # sparrow depends on date::date and date::date-tz. It is tricky to use FetchContent + # to vendor date library since sparrow links date::date and date::date-tz directly. list(APPEND ICEBERG_SYSTEM_DEPENDENCIES date) set(ICEBERG_SYSTEM_DEPENDENCIES ${ICEBERG_SYSTEM_DEPENDENCIES} From 7ea03ee0ada656de72e2d0cd9abf1c7efa177b51 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Sun, 9 Feb 2025 13:55:14 +0800 Subject: [PATCH 03/11] fix linter to install date lib --- .github/workflows/cpp-linter.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index da58dca94..ed7b1bbc3 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -30,7 +30,15 @@ jobs: cpp-linter: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 + - name: Checkout iceberg-cpp + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + - name: Install Date Library + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y libdate-tz3 libhowardhinnant-date-dev - name: Run build run: | mkdir build && cd build From 59c7dae42c774d7d2826ea148ec09f5b00e3242b Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 10 Feb 2025 00:27:47 +0800 Subject: [PATCH 04/11] try to fix windows ci --- cmake_modules/IcebergThirdpartyToolchain.cmake | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8ace31574..caa9749ff 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -205,6 +205,11 @@ endif() # It is also possible to vendor nanoarrow using the bundled source code. function(resolve_nanoarrow_dependency) prepare_fetchcontent() + + set(NANOARROW_NAMESPACE + "iceberg" + CACHE STRING "" FORCE) + fetchcontent_declare(nanoarrow ${FC_DECLARE_COMMON_OPTIONS} URL "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.6.0/apache-arrow-nanoarrow-0.6.0.tar.gz" @@ -232,6 +237,11 @@ function(resolve_sparrow_dependency) # include could not find requested file: sanitizers list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR}/_deps/sparrow-src/cmake) + if(MSVC_TOOLCHAIN) + # MSVC does not support int128_t + set(USE_LARGE_INT_PLACEHOLDERS ON) + endif() + fetchcontent_declare(sparrow ${FC_DECLARE_COMMON_OPTIONS} GIT_REPOSITORY https://github.com/man-group/sparrow.git @@ -240,6 +250,10 @@ function(resolve_sparrow_dependency) fetchcontent_makeavailable(sparrow) set_target_properties(sparrow PROPERTIES OUTPUT_NAME "iceberg_vendored_sparrow") + target_compile_definitions(sparrow INTERFACE SPARROW_USE_DATE_POLYFILL) + if(MSVC_TOOLCHAIN) + target_compile_definitions(sparrow INTERFACE SPARROW_USE_LARGE_INT_PLACEHOLDERS) + endif() install(TARGETS sparrow EXPORT iceberg_targets RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}" From 30abeacdcd408337fac998fd938852fe33d8202d Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 10 Feb 2025 10:00:38 +0800 Subject: [PATCH 05/11] try to fix macos ci --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0ffcbc606..bed489440 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -60,8 +60,8 @@ jobs: shell: bash run: ci/scripts/build_example.sh $(pwd)/example macos: - name: AArch64 macOS 14 - runs-on: macos-14 + name: AArch64 macOS 15 + runs-on: macos-15 timeout-minutes: 30 strategy: fail-fast: false From ac025e182c350a6252aed62daba588d21cd3dc19 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Wed, 19 Mar 2025 17:44:46 +0800 Subject: [PATCH 06/11] add example code and test --- .../IcebergThirdpartyToolchain.cmake | 8 +- src/iceberg/CMakeLists.txt | 7 +- src/iceberg/arrow_c_data.h | 71 ++++++++++++++ src/iceberg/arrow_c_data_internal.cc | 94 +++++++++++++++++++ src/iceberg/arrow_c_data_internal.h | 41 ++++++++ test/CMakeLists.txt | 2 + test/arrow/CMakeLists.txt | 25 +++++ test/arrow/arrow_test.cc | 74 +++++++++++++++ test/avro/CMakeLists.txt | 24 +++++ .../avro_unittest.cc => avro/avro_test.cc} | 0 test/core/CMakeLists.txt | 8 -- 11 files changed, 343 insertions(+), 11 deletions(-) create mode 100644 src/iceberg/arrow_c_data.h create mode 100644 src/iceberg/arrow_c_data_internal.cc create mode 100644 src/iceberg/arrow_c_data_internal.h create mode 100644 test/arrow/CMakeLists.txt create mode 100644 test/arrow/arrow_test.cc create mode 100644 test/avro/CMakeLists.txt rename test/{core/avro_unittest.cc => avro/avro_test.cc} (100%) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index caa9749ff..80ac38ac1 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -65,6 +65,10 @@ function(resolve_arrow_dependency) set(ARROW_BUILD_STATIC ON CACHE BOOL "" FORCE) + # To workaround https://github.com/apache/arrow/pull/45513 + set(ARROW_IPC + ON + CACHE BOOL "" FORCE) set(ARROW_FILESYSTEM OFF CACHE BOOL "" FORCE) @@ -207,7 +211,7 @@ function(resolve_nanoarrow_dependency) prepare_fetchcontent() set(NANOARROW_NAMESPACE - "iceberg" + "Iceberg" CACHE STRING "" FORCE) fetchcontent_declare(nanoarrow @@ -245,7 +249,7 @@ function(resolve_sparrow_dependency) fetchcontent_declare(sparrow ${FC_DECLARE_COMMON_OPTIONS} GIT_REPOSITORY https://github.com/man-group/sparrow.git - GIT_TAG b0794cace22a55c32e90c0236034e040b822b957 # 0.3.0 + GIT_TAG f2fdcadc07538d558f128e887257e2ac19610adf # 0.5.0 ) fetchcontent_makeavailable(sparrow) diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 964410751..069244566 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -15,7 +15,12 @@ # specific language governing permissions and limitations # under the License. -set(ICEBERG_SOURCES demo_table.cc schema.cc schema_field.cc type.cc) +set(ICEBERG_SOURCES + arrow_c_data_internal.cc + demo_table.cc + schema.cc + schema_field.cc + type.cc) set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS) set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS) diff --git a/src/iceberg/arrow_c_data.h b/src/iceberg/arrow_c_data.h new file mode 100644 index 000000000..43c2adbd8 --- /dev/null +++ b/src/iceberg/arrow_c_data.h @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/arrow_c_data.h +/// Arrow C data interface +/// +/// The Arrow C Data interface (https://arrow.apache.org/docs/format/CDataInterface.html) +/// is part of the Arrow Columnar Format specification +/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for +/// documentation of these structures. + +#include + +#ifndef ARROW_C_DATA_INTERFACE +# define ARROW_C_DATA_INTERFACE + +extern "C" { +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +} // extern "C" + +#endif // ARROW_C_DATA_INTERFACE diff --git a/src/iceberg/arrow_c_data_internal.cc b/src/iceberg/arrow_c_data_internal.cc new file mode 100644 index 000000000..3e5b77223 --- /dev/null +++ b/src/iceberg/arrow_c_data_internal.cc @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/arrow_c_data_internal.h" + +#include + +namespace iceberg::internal { + +std::pair CreateExampleArrowSchemaAndArrayByNanoarrow() { + ArrowSchema out_schema; + + // Initializes the root struct schema + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(&out_schema, NANOARROW_TYPE_STRUCT)); + NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateChildren(&out_schema, 2)); + + // Set up the non-nullable int64 field + struct ArrowSchema* int64_field = out_schema.children[0]; + ArrowSchemaInit(int64_field); + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(int64_field, NANOARROW_TYPE_INT64)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(int64_field, "id")); + int64_field->flags &= ~ARROW_FLAG_NULLABLE; + + // Set up the nullable string field + struct ArrowSchema* string_field = out_schema.children[1]; + ArrowSchemaInit(string_field); + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(string_field, NANOARROW_TYPE_STRING)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(string_field, "name")); + string_field->flags |= ARROW_FLAG_NULLABLE; + + constexpr int64_t kNumValues = 3; + std::array int64_values = {1, 2, 3}; + std::array string_values = {"a", "b", "c"}; + + ArrowArray out_array; + NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(&out_array, &out_schema, nullptr)); + ArrowArray* int64_array = out_array.children[0]; + ArrowArray* string_array = out_array.children[1]; + + NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(int64_array)); + NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(string_array)); + + for (int64_t i = 0; i < kNumValues; i++) { + NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(int64_array, int64_values[i])); + NANOARROW_THROW_NOT_OK( + ArrowArrayAppendString(string_array, ArrowCharView(string_values[i].c_str()))); + } + + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(int64_array, nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(string_array, nullptr)); + + out_array.length = kNumValues; + out_array.null_count = 0; + + return {out_schema, out_array}; +} + +std::pair CreateExampleArrowSchemaAndArrayBySparrow() { + using struct_type = std::tuple>; + std::vector values = { + {1, "a"}, + {2, "b"}, + {3, "c"}, + }; + auto sparrow_array = sparrow::build(values); + + // Demonstrate the use of arrow_proxy to modify the schema + auto [_, non_owning_schema] = sparrow::get_arrow_structures(sparrow_array); + non_owning_schema->children[0]->name = "id"; + non_owning_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; + non_owning_schema->children[1]->name = "name"; + non_owning_schema->children[1]->flags |= ARROW_FLAG_NULLABLE; + + auto [array, schema] = sparrow::extract_arrow_structures(std::move(sparrow_array)); + return {schema, array}; +} + +} // namespace iceberg::internal diff --git a/src/iceberg/arrow_c_data_internal.h b/src/iceberg/arrow_c_data_internal.h new file mode 100644 index 000000000..8a190b32e --- /dev/null +++ b/src/iceberg/arrow_c_data_internal.h @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +namespace iceberg::internal { + +/** + * @brief Create a simple schema with non-nullable int64 and nullable string fields. + * + * This is the example code to demonstrate the usage of nanoarrow API. + */ +std::pair CreateExampleArrowSchemaAndArrayByNanoarrow(); + +/** + * @brief Create a simple ArrowArray with non-nullable int64 and nullable string fields + * + * This is the example code to demonstrate the usage of sparrow API. + */ +std::pair CreateExampleArrowSchemaAndArrayBySparrow(); + +} // namespace iceberg::internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8c7fdf61..e29a76ec7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -23,4 +23,6 @@ fetchcontent_declare(googletest GTest) fetchcontent_makeavailable(googletest) +add_subdirectory(arrow) +add_subdirectory(avro) add_subdirectory(core) diff --git a/test/arrow/CMakeLists.txt b/test/arrow/CMakeLists.txt new file mode 100644 index 000000000..0ef658643 --- /dev/null +++ b/test/arrow/CMakeLists.txt @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(ICEBERG_ARROW) + add_executable(arrow_unittest) + target_sources(arrow_unittest PRIVATE arrow_test.cc) + target_link_libraries(arrow_unittest PRIVATE iceberg_arrow_static Arrow::arrow_static + GTest::gtest_main) + target_include_directories(arrow_unittest PRIVATE "${ICEBERG_INCLUDES}") + add_test(NAME arrow_unittest COMMAND arrow_unittest) +endif() diff --git a/test/arrow/arrow_test.cc b/test/arrow/arrow_test.cc new file mode 100644 index 000000000..c9ced8c9f --- /dev/null +++ b/test/arrow/arrow_test.cc @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include + +#include "iceberg/arrow_c_data_internal.h" + +namespace iceberg { + +namespace { + +void CheckArrowSchemaAndArray(ArrowSchema* schema, ArrowArray* array) { + auto arrow_schema = ::arrow::ImportSchema(schema).ValueOrDie(); + EXPECT_EQ(arrow_schema->num_fields(), 2); + + auto id_field = arrow_schema->field(0); + EXPECT_EQ(id_field->name(), "id"); + EXPECT_EQ(id_field->type()->id(), ::arrow::Type::INT64); + EXPECT_FALSE(id_field->nullable()); + + auto name_field = arrow_schema->field(1); + EXPECT_EQ(name_field->name(), "name"); + EXPECT_EQ(name_field->type()->id(), ::arrow::Type::STRING); + EXPECT_TRUE(name_field->nullable()); + + auto arrow_record_batch = ::arrow::ImportRecordBatch(array, arrow_schema).ValueOrDie(); + EXPECT_EQ(arrow_record_batch->num_rows(), 3); + EXPECT_EQ(arrow_record_batch->num_columns(), 2); + + auto id_column = arrow_record_batch->column(0); + EXPECT_EQ(id_column->type()->id(), ::arrow::Type::INT64); + EXPECT_EQ(id_column->GetScalar(0).ValueOrDie()->ToString(), "1"); + EXPECT_EQ(id_column->GetScalar(1).ValueOrDie()->ToString(), "2"); + EXPECT_EQ(id_column->GetScalar(2).ValueOrDie()->ToString(), "3"); + + auto name_column = arrow_record_batch->column(1); + EXPECT_EQ(name_column->type()->id(), ::arrow::Type::STRING); + EXPECT_EQ(name_column->GetScalar(0).ValueOrDie()->ToString(), "a"); + EXPECT_EQ(name_column->GetScalar(1).ValueOrDie()->ToString(), "b"); + EXPECT_EQ(name_column->GetScalar(2).ValueOrDie()->ToString(), "c"); +} + +} // namespace + +TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) { + auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayByNanoarrow(); + CheckArrowSchemaAndArray(&schema, &array); +} + +TEST(ArrowCDataTest, CheckArrowSchemaAndArrayBySparrow) { + auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayBySparrow(); + CheckArrowSchemaAndArray(&schema, &array); +} + +} // namespace iceberg diff --git a/test/avro/CMakeLists.txt b/test/avro/CMakeLists.txt new file mode 100644 index 000000000..9cd1c0b8a --- /dev/null +++ b/test/avro/CMakeLists.txt @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(ICEBERG_AVRO) + add_executable(avro_unittest) + target_sources(avro_unittest PRIVATE avro_test.cc) + target_link_libraries(avro_unittest PRIVATE iceberg_avro_static GTest::gtest_main) + target_include_directories(avro_unittest PRIVATE "${ICEBERG_INCLUDES}") + add_test(NAME avro_unittest COMMAND avro_unittest) +endif() diff --git a/test/core/avro_unittest.cc b/test/avro/avro_test.cc similarity index 100% rename from test/core/avro_unittest.cc rename to test/avro/avro_test.cc diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt index 23b084444..6e82b9b40 100644 --- a/test/core/CMakeLists.txt +++ b/test/core/CMakeLists.txt @@ -22,14 +22,6 @@ target_link_libraries(core_unittest PRIVATE iceberg_static GTest::gtest_main GTe target_include_directories(core_unittest PRIVATE "${ICEBERG_INCLUDES}") add_test(NAME core_unittest COMMAND core_unittest) -if(ICEBERG_AVRO) - add_executable(avro_unittest) - target_sources(avro_unittest PRIVATE avro_unittest.cc) - target_link_libraries(avro_unittest PRIVATE iceberg_avro_static GTest::gtest_main) - target_include_directories(avro_unittest PRIVATE "${ICEBERG_INCLUDES}") - add_test(NAME avro_unittest COMMAND avro_unittest) -endif() - add_executable(expected_test) target_sources(expected_test PRIVATE expected_test.cc) target_link_libraries(expected_test PRIVATE iceberg_static GTest::gtest_main) From 1484064829cce32819c8d7b5b58c07cfeed65885 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Wed, 19 Mar 2025 18:05:35 +0800 Subject: [PATCH 07/11] add -fPIC to nanoarrow --- cmake_modules/IcebergThirdpartyToolchain.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 80ac38ac1..38460ea19 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -221,6 +221,7 @@ function(resolve_nanoarrow_dependency) fetchcontent_makeavailable(nanoarrow) set_target_properties(nanoarrow PROPERTIES OUTPUT_NAME "iceberg_vendored_nanoarrow") + set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON) install(TARGETS nanoarrow EXPORT iceberg_targets RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}" From f15ac1d72a0104d954c61efd4d9a6763d362c04e Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Wed, 19 Mar 2025 22:18:03 +0800 Subject: [PATCH 08/11] add missing include --- src/iceberg/arrow_c_data_internal.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/iceberg/arrow_c_data_internal.cc b/src/iceberg/arrow_c_data_internal.cc index 3e5b77223..56082f9e0 100644 --- a/src/iceberg/arrow_c_data_internal.cc +++ b/src/iceberg/arrow_c_data_internal.cc @@ -19,6 +19,11 @@ #include "iceberg/arrow_c_data_internal.h" +#include +#include +#include +#include + #include namespace iceberg::internal { From 4b115718539f2d6dd894b34ff6f277c894e31ca6 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Wed, 19 Mar 2025 23:12:37 +0800 Subject: [PATCH 09/11] temporarily disable sparrow to fix windows ci --- .../IcebergThirdpartyToolchain.cmake | 2 +- src/iceberg/CMakeLists.txt | 12 ++++-- src/iceberg/arrow_c_data_internal.cc | 40 +++++++++---------- src/iceberg/arrow_c_data_internal.h | 14 +++---- test/arrow/arrow_test.cc | 8 ++-- 5 files changed, 40 insertions(+), 36 deletions(-) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 38460ea19..ed1340b56 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -273,4 +273,4 @@ function(resolve_sparrow_dependency) PARENT_SCOPE) endfunction() -resolve_sparrow_dependency() +# resolve_sparrow_dependency() diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 069244566..a8670bb60 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -27,10 +27,14 @@ set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS) set(ICEBERG_STATIC_INSTALL_INTERFACE_LIBS) set(ICEBERG_SHARED_INSTALL_INTERFACE_LIBS) -list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow sparrow) -list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow sparrow) -list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow" "Iceberg::sparrow") -list(APPEND ICEBERG_SHARED_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow" "Iceberg::sparrow") +# list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow sparrow) +# list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow sparrow) +# list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow" "Iceberg::sparrow") +# list(APPEND ICEBERG_SHARED_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow" "Iceberg::sparrow") +list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow) +list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow) +list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow") +list(APPEND ICEBERG_SHARED_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow") add_iceberg_lib(iceberg SOURCES diff --git a/src/iceberg/arrow_c_data_internal.cc b/src/iceberg/arrow_c_data_internal.cc index 56082f9e0..2f6f065c9 100644 --- a/src/iceberg/arrow_c_data_internal.cc +++ b/src/iceberg/arrow_c_data_internal.cc @@ -24,7 +24,7 @@ #include #include -#include +// #include namespace iceberg::internal { @@ -76,24 +76,24 @@ std::pair CreateExampleArrowSchemaAndArrayByNanoarrow() return {out_schema, out_array}; } -std::pair CreateExampleArrowSchemaAndArrayBySparrow() { - using struct_type = std::tuple>; - std::vector values = { - {1, "a"}, - {2, "b"}, - {3, "c"}, - }; - auto sparrow_array = sparrow::build(values); - - // Demonstrate the use of arrow_proxy to modify the schema - auto [_, non_owning_schema] = sparrow::get_arrow_structures(sparrow_array); - non_owning_schema->children[0]->name = "id"; - non_owning_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; - non_owning_schema->children[1]->name = "name"; - non_owning_schema->children[1]->flags |= ARROW_FLAG_NULLABLE; - - auto [array, schema] = sparrow::extract_arrow_structures(std::move(sparrow_array)); - return {schema, array}; -} +// std::pair CreateExampleArrowSchemaAndArrayBySparrow() { +// using struct_type = std::tuple>; +// std::vector values = { +// {1, "a"}, +// {2, "b"}, +// {3, "c"}, +// }; +// auto sparrow_array = sparrow::build(values); + +// // Demonstrate the use of arrow_proxy to modify the schema +// auto [_, non_owning_schema] = sparrow::get_arrow_structures(sparrow_array); +// non_owning_schema->children[0]->name = "id"; +// non_owning_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; +// non_owning_schema->children[1]->name = "name"; +// non_owning_schema->children[1]->flags |= ARROW_FLAG_NULLABLE; + +// auto [array, schema] = sparrow::extract_arrow_structures(std::move(sparrow_array)); +// return {schema, array}; +// } } // namespace iceberg::internal diff --git a/src/iceberg/arrow_c_data_internal.h b/src/iceberg/arrow_c_data_internal.h index 8a190b32e..3e86b5c59 100644 --- a/src/iceberg/arrow_c_data_internal.h +++ b/src/iceberg/arrow_c_data_internal.h @@ -20,7 +20,7 @@ #pragma once #include -#include +// #include namespace iceberg::internal { @@ -31,11 +31,11 @@ namespace iceberg::internal { */ std::pair CreateExampleArrowSchemaAndArrayByNanoarrow(); -/** - * @brief Create a simple ArrowArray with non-nullable int64 and nullable string fields - * - * This is the example code to demonstrate the usage of sparrow API. - */ -std::pair CreateExampleArrowSchemaAndArrayBySparrow(); +// /** +// * @brief Create a simple ArrowArray with non-nullable int64 and nullable string fields +// * +// * This is the example code to demonstrate the usage of sparrow API. +// */ +// std::pair CreateExampleArrowSchemaAndArrayBySparrow(); } // namespace iceberg::internal diff --git a/test/arrow/arrow_test.cc b/test/arrow/arrow_test.cc index c9ced8c9f..19a5cdf79 100644 --- a/test/arrow/arrow_test.cc +++ b/test/arrow/arrow_test.cc @@ -66,9 +66,9 @@ TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) { CheckArrowSchemaAndArray(&schema, &array); } -TEST(ArrowCDataTest, CheckArrowSchemaAndArrayBySparrow) { - auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayBySparrow(); - CheckArrowSchemaAndArray(&schema, &array); -} +// TEST(ArrowCDataTest, CheckArrowSchemaAndArrayBySparrow) { +// auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayBySparrow(); +// CheckArrowSchemaAndArray(&schema, &array); +// } } // namespace iceberg From 49143c6cde2ce1358525a87399a0dfc88a0f9822 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Thu, 20 Mar 2025 09:56:13 +0800 Subject: [PATCH 10/11] remove sparrow --- .../IcebergThirdpartyToolchain.cmake | 52 +------------------ src/iceberg/CMakeLists.txt | 4 -- src/iceberg/arrow_c_data_internal.cc | 25 --------- src/iceberg/arrow_c_data_internal.h | 8 --- test/arrow/arrow_test.cc | 20 ++----- 5 files changed, 6 insertions(+), 103 deletions(-) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index ed1340b56..53b20c188 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -210,18 +210,14 @@ endif() function(resolve_nanoarrow_dependency) prepare_fetchcontent() - set(NANOARROW_NAMESPACE - "Iceberg" - CACHE STRING "" FORCE) - fetchcontent_declare(nanoarrow ${FC_DECLARE_COMMON_OPTIONS} URL "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.6.0/apache-arrow-nanoarrow-0.6.0.tar.gz" ) fetchcontent_makeavailable(nanoarrow) - set_target_properties(nanoarrow PROPERTIES OUTPUT_NAME "iceberg_vendored_nanoarrow") - set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_target_properties(nanoarrow PROPERTIES OUTPUT_NAME "iceberg_vendored_nanoarrow" + POSITION_INDEPENDENT_CODE ON) install(TARGETS nanoarrow EXPORT iceberg_targets RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}" @@ -230,47 +226,3 @@ function(resolve_nanoarrow_dependency) endfunction() resolve_nanoarrow_dependency() - -# ---------------------------------------------------------------------- -# Sparrow - -function(resolve_sparrow_dependency) - prepare_fetchcontent() - - # Add the sparrow cmake module path to the CMAKE_MODULE_PATH - # Otherwise we will see error below: - # include could not find requested file: sanitizers - list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR}/_deps/sparrow-src/cmake) - - if(MSVC_TOOLCHAIN) - # MSVC does not support int128_t - set(USE_LARGE_INT_PLACEHOLDERS ON) - endif() - - fetchcontent_declare(sparrow - ${FC_DECLARE_COMMON_OPTIONS} - GIT_REPOSITORY https://github.com/man-group/sparrow.git - GIT_TAG f2fdcadc07538d558f128e887257e2ac19610adf # 0.5.0 - ) - fetchcontent_makeavailable(sparrow) - - set_target_properties(sparrow PROPERTIES OUTPUT_NAME "iceberg_vendored_sparrow") - target_compile_definitions(sparrow INTERFACE SPARROW_USE_DATE_POLYFILL) - if(MSVC_TOOLCHAIN) - target_compile_definitions(sparrow INTERFACE SPARROW_USE_LARGE_INT_PLACEHOLDERS) - endif() - install(TARGETS sparrow - EXPORT iceberg_targets - RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}" - ARCHIVE DESTINATION "${ICEBERG_INSTALL_LIBDIR}" - LIBRARY DESTINATION "${ICEBERG_INSTALL_LIBDIR}") - - # sparrow depends on date::date and date::date-tz. It is tricky to use FetchContent - # to vendor date library since sparrow links date::date and date::date-tz directly. - list(APPEND ICEBERG_SYSTEM_DEPENDENCIES date) - set(ICEBERG_SYSTEM_DEPENDENCIES - ${ICEBERG_SYSTEM_DEPENDENCIES} - PARENT_SCOPE) -endfunction() - -# resolve_sparrow_dependency() diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index a8670bb60..0c1475bbb 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -27,10 +27,6 @@ set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS) set(ICEBERG_STATIC_INSTALL_INTERFACE_LIBS) set(ICEBERG_SHARED_INSTALL_INTERFACE_LIBS) -# list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow sparrow) -# list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow sparrow) -# list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow" "Iceberg::sparrow") -# list(APPEND ICEBERG_SHARED_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow" "Iceberg::sparrow") list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow) list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow) list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow") diff --git a/src/iceberg/arrow_c_data_internal.cc b/src/iceberg/arrow_c_data_internal.cc index 2f6f065c9..c43921dcf 100644 --- a/src/iceberg/arrow_c_data_internal.cc +++ b/src/iceberg/arrow_c_data_internal.cc @@ -19,12 +19,7 @@ #include "iceberg/arrow_c_data_internal.h" -#include #include -#include -#include - -// #include namespace iceberg::internal { @@ -76,24 +71,4 @@ std::pair CreateExampleArrowSchemaAndArrayByNanoarrow() return {out_schema, out_array}; } -// std::pair CreateExampleArrowSchemaAndArrayBySparrow() { -// using struct_type = std::tuple>; -// std::vector values = { -// {1, "a"}, -// {2, "b"}, -// {3, "c"}, -// }; -// auto sparrow_array = sparrow::build(values); - -// // Demonstrate the use of arrow_proxy to modify the schema -// auto [_, non_owning_schema] = sparrow::get_arrow_structures(sparrow_array); -// non_owning_schema->children[0]->name = "id"; -// non_owning_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; -// non_owning_schema->children[1]->name = "name"; -// non_owning_schema->children[1]->flags |= ARROW_FLAG_NULLABLE; - -// auto [array, schema] = sparrow::extract_arrow_structures(std::move(sparrow_array)); -// return {schema, array}; -// } - } // namespace iceberg::internal diff --git a/src/iceberg/arrow_c_data_internal.h b/src/iceberg/arrow_c_data_internal.h index 3e86b5c59..2d913c55e 100644 --- a/src/iceberg/arrow_c_data_internal.h +++ b/src/iceberg/arrow_c_data_internal.h @@ -20,7 +20,6 @@ #pragma once #include -// #include namespace iceberg::internal { @@ -31,11 +30,4 @@ namespace iceberg::internal { */ std::pair CreateExampleArrowSchemaAndArrayByNanoarrow(); -// /** -// * @brief Create a simple ArrowArray with non-nullable int64 and nullable string fields -// * -// * This is the example code to demonstrate the usage of sparrow API. -// */ -// std::pair CreateExampleArrowSchemaAndArrayBySparrow(); - } // namespace iceberg::internal diff --git a/test/arrow/arrow_test.cc b/test/arrow/arrow_test.cc index 19a5cdf79..1d730fc49 100644 --- a/test/arrow/arrow_test.cc +++ b/test/arrow/arrow_test.cc @@ -26,10 +26,10 @@ namespace iceberg { -namespace { +TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) { + auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayByNanoarrow(); -void CheckArrowSchemaAndArray(ArrowSchema* schema, ArrowArray* array) { - auto arrow_schema = ::arrow::ImportSchema(schema).ValueOrDie(); + auto arrow_schema = ::arrow::ImportSchema(&schema).ValueOrDie(); EXPECT_EQ(arrow_schema->num_fields(), 2); auto id_field = arrow_schema->field(0); @@ -42,7 +42,7 @@ void CheckArrowSchemaAndArray(ArrowSchema* schema, ArrowArray* array) { EXPECT_EQ(name_field->type()->id(), ::arrow::Type::STRING); EXPECT_TRUE(name_field->nullable()); - auto arrow_record_batch = ::arrow::ImportRecordBatch(array, arrow_schema).ValueOrDie(); + auto arrow_record_batch = ::arrow::ImportRecordBatch(&array, arrow_schema).ValueOrDie(); EXPECT_EQ(arrow_record_batch->num_rows(), 3); EXPECT_EQ(arrow_record_batch->num_columns(), 2); @@ -59,16 +59,4 @@ void CheckArrowSchemaAndArray(ArrowSchema* schema, ArrowArray* array) { EXPECT_EQ(name_column->GetScalar(2).ValueOrDie()->ToString(), "c"); } -} // namespace - -TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) { - auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayByNanoarrow(); - CheckArrowSchemaAndArray(&schema, &array); -} - -// TEST(ArrowCDataTest, CheckArrowSchemaAndArrayBySparrow) { -// auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayBySparrow(); -// CheckArrowSchemaAndArray(&schema, &array); -// } - } // namespace iceberg From d4e1c82c37e1f14d6a47300dbf1d3de5a3c2f3e1 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Thu, 20 Mar 2025 10:00:42 +0800 Subject: [PATCH 11/11] remove date library in ci --- .github/workflows/cpp-linter.yml | 5 ----- .github/workflows/test.yml | 12 +----------- src/iceberg/arrow_c_data_internal.cc | 2 ++ 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index ed7b1bbc3..b31e62592 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -34,11 +34,6 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - - name: Install Date Library - shell: bash - run: | - sudo apt-get update - sudo apt-get install -y libdate-tz3 libhowardhinnant-date-dev - name: Run build run: | mkdir build && cd build diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bed489440..43aebad1b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,11 +48,6 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - - name: Install Date Library - shell: bash - run: | - sudo apt-get update - sudo apt-get install -y libdate-tz3 libhowardhinnant-date-dev - name: Build Iceberg shell: bash run: ci/scripts/build_iceberg.sh $(pwd) @@ -70,10 +65,6 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - - name: Install Date Library - shell: bash - run: | - brew install howard-hinnant-date - name: Build Iceberg shell: bash run: ci/scripts/build_iceberg.sh $(pwd) @@ -89,11 +80,10 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - - name: Install ZLIB and Date Library + - name: Install ZLIB shell: cmd run: | vcpkg install zlib:x64-windows - vcpkg install date:x64-windows - name: Build Iceberg shell: cmd run: | diff --git a/src/iceberg/arrow_c_data_internal.cc b/src/iceberg/arrow_c_data_internal.cc index c43921dcf..9716b25a4 100644 --- a/src/iceberg/arrow_c_data_internal.cc +++ b/src/iceberg/arrow_c_data_internal.cc @@ -19,7 +19,9 @@ #include "iceberg/arrow_c_data_internal.h" +#include #include +#include namespace iceberg::internal {