Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/cpp-linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ jobs:
cpp-linter:
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
- name: Checkout iceberg-cpp
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Run build
run: |
mkdir build && cd build
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ jobs:
shell: bash
run: ci/scripts/build_example.sh $(pwd)/example
macos:
name: AArch64 macOS 14
runs-on: macos-14
name: AArch64 macOS 15
runs-on: macos-15
timeout-minutes: 30
strategy:
fail-fast: false
Expand Down
28 changes: 28 additions & 0 deletions cmake_modules/IcebergThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ function(resolve_arrow_dependency)
set(ARROW_BUILD_STATIC
ON
CACHE BOOL "" FORCE)
# To workaround https://github.com/apache/arrow/pull/45513
set(ARROW_IPC
ON
CACHE BOOL "" FORCE)
set(ARROW_FILESYSTEM
OFF
CACHE BOOL "" FORCE)
Expand Down Expand Up @@ -198,3 +202,27 @@ endfunction()
if(ICEBERG_AVRO)
resolve_avro_dependency()
endif()

# ----------------------------------------------------------------------
# Nanoarrow

# It is also possible to vendor nanoarrow using the bundled source code.
function(resolve_nanoarrow_dependency)
prepare_fetchcontent()

fetchcontent_declare(nanoarrow
${FC_DECLARE_COMMON_OPTIONS}
URL "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.6.0/apache-arrow-nanoarrow-0.6.0.tar.gz"
)
fetchcontent_makeavailable(nanoarrow)

set_target_properties(nanoarrow PROPERTIES OUTPUT_NAME "iceberg_vendored_nanoarrow"
POSITION_INDEPENDENT_CODE ON)
install(TARGETS nanoarrow
EXPORT iceberg_targets
RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}"
ARCHIVE DESTINATION "${ICEBERG_INSTALL_LIBDIR}"
LIBRARY DESTINATION "${ICEBERG_INSTALL_LIBDIR}")
endfunction()

resolve_nanoarrow_dependency()
27 changes: 25 additions & 2 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,36 @@
# specific language governing permissions and limitations
# under the License.

set(ICEBERG_SOURCES demo_table.cc schema.cc schema_field.cc type.cc)
set(ICEBERG_SOURCES
arrow_c_data_internal.cc
demo_table.cc
schema.cc
schema_field.cc
type.cc)

set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)
set(ICEBERG_STATIC_INSTALL_INTERFACE_LIBS)
set(ICEBERG_SHARED_INSTALL_INTERFACE_LIBS)

list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow)
list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow)
list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow")
list(APPEND ICEBERG_SHARED_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow")

add_iceberg_lib(iceberg
SOURCES
${ICEBERG_SOURCES}
PRIVATE_INCLUDES
${ICEBERG_INCLUDES})
${ICEBERG_INCLUDES}
SHARED_LINK_LIBS
${ICEBERG_SHARED_BUILD_INTERFACE_LIBS}
STATIC_LINK_LIBS
${ICEBERG_STATIC_BUILD_INTERFACE_LIBS}
STATIC_INSTALL_INTERFACE_LIBS
${ICEBERG_STATIC_INSTALL_INTERFACE_LIBS}
SHARED_INSTALL_INTERFACE_LIBS
${ICEBERG_SHARED_INSTALL_INTERFACE_LIBS})

iceberg_install_all_headers(iceberg)

Expand Down
71 changes: 71 additions & 0 deletions src/iceberg/arrow_c_data.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

/// \file iceberg/arrow_c_data.h
/// Arrow C data interface
///
/// The Arrow C Data interface (https://arrow.apache.org/docs/format/CDataInterface.html)
/// is part of the Arrow Columnar Format specification
/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for
/// documentation of these structures.

#include <cstdint>

#ifndef ARROW_C_DATA_INTERFACE
# define ARROW_C_DATA_INTERFACE

extern "C" {
struct ArrowSchema {
// Array type description
const char* format;
const char* name;
const char* metadata;
int64_t flags;
int64_t n_children;
struct ArrowSchema** children;
struct ArrowSchema* dictionary;

// Release callback
void (*release)(struct ArrowSchema*);
// Opaque producer-specific data
void* private_data;
};

struct ArrowArray {
// Array data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void** buffers;
struct ArrowArray** children;
struct ArrowArray* dictionary;

// Release callback
void (*release)(struct ArrowArray*);
// Opaque producer-specific data
void* private_data;
};

} // extern "C"

#endif // ARROW_C_DATA_INTERFACE
76 changes: 76 additions & 0 deletions src/iceberg/arrow_c_data_internal.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/arrow_c_data_internal.h"

#include <array>
#include <string>
#include <utility>

namespace iceberg::internal {

std::pair<ArrowSchema, ArrowArray> CreateExampleArrowSchemaAndArrayByNanoarrow() {
ArrowSchema out_schema;

// Initializes the root struct schema
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(&out_schema, NANOARROW_TYPE_STRUCT));
NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateChildren(&out_schema, 2));

// Set up the non-nullable int64 field
struct ArrowSchema* int64_field = out_schema.children[0];
ArrowSchemaInit(int64_field);
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(int64_field, NANOARROW_TYPE_INT64));
NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(int64_field, "id"));
int64_field->flags &= ~ARROW_FLAG_NULLABLE;

// Set up the nullable string field
struct ArrowSchema* string_field = out_schema.children[1];
ArrowSchemaInit(string_field);
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(string_field, NANOARROW_TYPE_STRING));
NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(string_field, "name"));
string_field->flags |= ARROW_FLAG_NULLABLE;

constexpr int64_t kNumValues = 3;
std::array<int64_t, kNumValues> int64_values = {1, 2, 3};
std::array<std::string, kNumValues> string_values = {"a", "b", "c"};

ArrowArray out_array;
NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(&out_array, &out_schema, nullptr));
ArrowArray* int64_array = out_array.children[0];
ArrowArray* string_array = out_array.children[1];

NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(int64_array));
NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(string_array));

for (int64_t i = 0; i < kNumValues; i++) {
NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(int64_array, int64_values[i]));
NANOARROW_THROW_NOT_OK(
ArrowArrayAppendString(string_array, ArrowCharView(string_values[i].c_str())));
}

NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(int64_array, nullptr));
NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(string_array, nullptr));

out_array.length = kNumValues;
out_array.null_count = 0;

return {out_schema, out_array};
}

} // namespace iceberg::internal
33 changes: 33 additions & 0 deletions src/iceberg/arrow_c_data_internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <nanoarrow/nanoarrow.hpp>

namespace iceberg::internal {

/**
* @brief Create a simple schema with non-nullable int64 and nullable string fields.
*
* This is the example code to demonstrate the usage of nanoarrow API.
*/
std::pair<ArrowSchema, ArrowArray> CreateExampleArrowSchemaAndArrayByNanoarrow();

} // namespace iceberg::internal
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ fetchcontent_declare(googletest
GTest)
fetchcontent_makeavailable(googletest)

add_subdirectory(arrow)
add_subdirectory(avro)
add_subdirectory(core)
25 changes: 25 additions & 0 deletions test/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

if(ICEBERG_ARROW)
add_executable(arrow_unittest)
target_sources(arrow_unittest PRIVATE arrow_test.cc)
target_link_libraries(arrow_unittest PRIVATE iceberg_arrow_static Arrow::arrow_static
GTest::gtest_main)
target_include_directories(arrow_unittest PRIVATE "${ICEBERG_INCLUDES}")
add_test(NAME arrow_unittest COMMAND arrow_unittest)
endif()
62 changes: 62 additions & 0 deletions test/arrow/arrow_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <arrow/api.h>
#include <arrow/c/bridge.h>
#include <arrow/result.h>
#include <gtest/gtest.h>

#include "iceberg/arrow_c_data_internal.h"

namespace iceberg {

TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) {
auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayByNanoarrow();

auto arrow_schema = ::arrow::ImportSchema(&schema).ValueOrDie();
EXPECT_EQ(arrow_schema->num_fields(), 2);

auto id_field = arrow_schema->field(0);
EXPECT_EQ(id_field->name(), "id");
EXPECT_EQ(id_field->type()->id(), ::arrow::Type::INT64);
EXPECT_FALSE(id_field->nullable());

auto name_field = arrow_schema->field(1);
EXPECT_EQ(name_field->name(), "name");
EXPECT_EQ(name_field->type()->id(), ::arrow::Type::STRING);
EXPECT_TRUE(name_field->nullable());

auto arrow_record_batch = ::arrow::ImportRecordBatch(&array, arrow_schema).ValueOrDie();
EXPECT_EQ(arrow_record_batch->num_rows(), 3);
EXPECT_EQ(arrow_record_batch->num_columns(), 2);

auto id_column = arrow_record_batch->column(0);
EXPECT_EQ(id_column->type()->id(), ::arrow::Type::INT64);
EXPECT_EQ(id_column->GetScalar(0).ValueOrDie()->ToString(), "1");
EXPECT_EQ(id_column->GetScalar(1).ValueOrDie()->ToString(), "2");
EXPECT_EQ(id_column->GetScalar(2).ValueOrDie()->ToString(), "3");

auto name_column = arrow_record_batch->column(1);
EXPECT_EQ(name_column->type()->id(), ::arrow::Type::STRING);
EXPECT_EQ(name_column->GetScalar(0).ValueOrDie()->ToString(), "a");
EXPECT_EQ(name_column->GetScalar(1).ValueOrDie()->ToString(), "b");
EXPECT_EQ(name_column->GetScalar(2).ValueOrDie()->ToString(), "c");
}

} // namespace iceberg
Loading
Loading