Skip to content

Commit 9fd3d53

Browse files
authored
Add Arrow C Data Interface and nanoarrow (#44)
Closes #33
1 parent cd82335 commit 9fd3d53

File tree

13 files changed

+352
-13
lines changed

13 files changed

+352
-13
lines changed

.github/workflows/cpp-linter.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ jobs:
3030
cpp-linter:
3131
runs-on: ubuntu-24.04
3232
steps:
33-
- uses: actions/checkout@v4
33+
- name: Checkout iceberg-cpp
34+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
35+
with:
36+
fetch-depth: 0
3437
- name: Run build
3538
run: |
3639
mkdir build && cd build

.github/workflows/test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ jobs:
5555
shell: bash
5656
run: ci/scripts/build_example.sh $(pwd)/example
5757
macos:
58-
name: AArch64 macOS 14
59-
runs-on: macos-14
58+
name: AArch64 macOS 15
59+
runs-on: macos-15
6060
timeout-minutes: 30
6161
strategy:
6262
fail-fast: false

cmake_modules/IcebergThirdpartyToolchain.cmake

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ function(resolve_arrow_dependency)
6565
set(ARROW_BUILD_STATIC
6666
ON
6767
CACHE BOOL "" FORCE)
68+
# To workaround https://github.com/apache/arrow/pull/45513
69+
set(ARROW_IPC
70+
ON
71+
CACHE BOOL "" FORCE)
6872
set(ARROW_FILESYSTEM
6973
OFF
7074
CACHE BOOL "" FORCE)
@@ -198,3 +202,27 @@ endfunction()
198202
if(ICEBERG_AVRO)
199203
resolve_avro_dependency()
200204
endif()
205+
206+
# ----------------------------------------------------------------------
207+
# Nanoarrow
208+
209+
# It is also possible to vendor nanoarrow using the bundled source code.
210+
function(resolve_nanoarrow_dependency)
211+
prepare_fetchcontent()
212+
213+
fetchcontent_declare(nanoarrow
214+
${FC_DECLARE_COMMON_OPTIONS}
215+
URL "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.6.0/apache-arrow-nanoarrow-0.6.0.tar.gz"
216+
)
217+
fetchcontent_makeavailable(nanoarrow)
218+
219+
set_target_properties(nanoarrow PROPERTIES OUTPUT_NAME "iceberg_vendored_nanoarrow"
220+
POSITION_INDEPENDENT_CODE ON)
221+
install(TARGETS nanoarrow
222+
EXPORT iceberg_targets
223+
RUNTIME DESTINATION "${ICEBERG_INSTALL_BINDIR}"
224+
ARCHIVE DESTINATION "${ICEBERG_INSTALL_LIBDIR}"
225+
LIBRARY DESTINATION "${ICEBERG_INSTALL_LIBDIR}")
226+
endfunction()
227+
228+
resolve_nanoarrow_dependency()

src/iceberg/CMakeLists.txt

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,36 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
set(ICEBERG_SOURCES demo_table.cc schema.cc schema_field.cc type.cc)
18+
set(ICEBERG_SOURCES
19+
arrow_c_data_internal.cc
20+
demo_table.cc
21+
schema.cc
22+
schema_field.cc
23+
type.cc)
24+
25+
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
26+
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)
27+
set(ICEBERG_STATIC_INSTALL_INTERFACE_LIBS)
28+
set(ICEBERG_SHARED_INSTALL_INTERFACE_LIBS)
29+
30+
list(APPEND ICEBERG_STATIC_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow)
31+
list(APPEND ICEBERG_SHARED_BUILD_INTERFACE_LIBS nanoarrow::nanoarrow)
32+
list(APPEND ICEBERG_STATIC_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow")
33+
list(APPEND ICEBERG_SHARED_INSTALL_INTERFACE_LIBS "Iceberg::nanoarrow")
1934

2035
add_iceberg_lib(iceberg
2136
SOURCES
2237
${ICEBERG_SOURCES}
2338
PRIVATE_INCLUDES
24-
${ICEBERG_INCLUDES})
39+
${ICEBERG_INCLUDES}
40+
SHARED_LINK_LIBS
41+
${ICEBERG_SHARED_BUILD_INTERFACE_LIBS}
42+
STATIC_LINK_LIBS
43+
${ICEBERG_STATIC_BUILD_INTERFACE_LIBS}
44+
STATIC_INSTALL_INTERFACE_LIBS
45+
${ICEBERG_STATIC_INSTALL_INTERFACE_LIBS}
46+
SHARED_INSTALL_INTERFACE_LIBS
47+
${ICEBERG_SHARED_INSTALL_INTERFACE_LIBS})
2548

2649
iceberg_install_all_headers(iceberg)
2750

src/iceberg/arrow_c_data.h

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/arrow_c_data.h
23+
/// Arrow C data interface
24+
///
25+
/// The Arrow C Data interface (https://arrow.apache.org/docs/format/CDataInterface.html)
26+
/// is part of the Arrow Columnar Format specification
27+
/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for
28+
/// documentation of these structures.
29+
30+
#include <cstdint>
31+
32+
#ifndef ARROW_C_DATA_INTERFACE
33+
# define ARROW_C_DATA_INTERFACE
34+
35+
extern "C" {
36+
struct ArrowSchema {
37+
// Array type description
38+
const char* format;
39+
const char* name;
40+
const char* metadata;
41+
int64_t flags;
42+
int64_t n_children;
43+
struct ArrowSchema** children;
44+
struct ArrowSchema* dictionary;
45+
46+
// Release callback
47+
void (*release)(struct ArrowSchema*);
48+
// Opaque producer-specific data
49+
void* private_data;
50+
};
51+
52+
struct ArrowArray {
53+
// Array data description
54+
int64_t length;
55+
int64_t null_count;
56+
int64_t offset;
57+
int64_t n_buffers;
58+
int64_t n_children;
59+
const void** buffers;
60+
struct ArrowArray** children;
61+
struct ArrowArray* dictionary;
62+
63+
// Release callback
64+
void (*release)(struct ArrowArray*);
65+
// Opaque producer-specific data
66+
void* private_data;
67+
};
68+
69+
} // extern "C"
70+
71+
#endif // ARROW_C_DATA_INTERFACE
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/arrow_c_data_internal.h"
21+
22+
#include <array>
23+
#include <string>
24+
#include <utility>
25+
26+
namespace iceberg::internal {
27+
28+
std::pair<ArrowSchema, ArrowArray> CreateExampleArrowSchemaAndArrayByNanoarrow() {
29+
ArrowSchema out_schema;
30+
31+
// Initializes the root struct schema
32+
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(&out_schema, NANOARROW_TYPE_STRUCT));
33+
NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateChildren(&out_schema, 2));
34+
35+
// Set up the non-nullable int64 field
36+
struct ArrowSchema* int64_field = out_schema.children[0];
37+
ArrowSchemaInit(int64_field);
38+
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(int64_field, NANOARROW_TYPE_INT64));
39+
NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(int64_field, "id"));
40+
int64_field->flags &= ~ARROW_FLAG_NULLABLE;
41+
42+
// Set up the nullable string field
43+
struct ArrowSchema* string_field = out_schema.children[1];
44+
ArrowSchemaInit(string_field);
45+
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(string_field, NANOARROW_TYPE_STRING));
46+
NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(string_field, "name"));
47+
string_field->flags |= ARROW_FLAG_NULLABLE;
48+
49+
constexpr int64_t kNumValues = 3;
50+
std::array<int64_t, kNumValues> int64_values = {1, 2, 3};
51+
std::array<std::string, kNumValues> string_values = {"a", "b", "c"};
52+
53+
ArrowArray out_array;
54+
NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(&out_array, &out_schema, nullptr));
55+
ArrowArray* int64_array = out_array.children[0];
56+
ArrowArray* string_array = out_array.children[1];
57+
58+
NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(int64_array));
59+
NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(string_array));
60+
61+
for (int64_t i = 0; i < kNumValues; i++) {
62+
NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(int64_array, int64_values[i]));
63+
NANOARROW_THROW_NOT_OK(
64+
ArrowArrayAppendString(string_array, ArrowCharView(string_values[i].c_str())));
65+
}
66+
67+
NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(int64_array, nullptr));
68+
NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(string_array, nullptr));
69+
70+
out_array.length = kNumValues;
71+
out_array.null_count = 0;
72+
73+
return {out_schema, out_array};
74+
}
75+
76+
} // namespace iceberg::internal
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <nanoarrow/nanoarrow.hpp>
23+
24+
namespace iceberg::internal {
25+
26+
/**
27+
* @brief Create a simple schema with non-nullable int64 and nullable string fields.
28+
*
29+
* This is the example code to demonstrate the usage of nanoarrow API.
30+
*/
31+
std::pair<ArrowSchema, ArrowArray> CreateExampleArrowSchemaAndArrayByNanoarrow();
32+
33+
} // namespace iceberg::internal

test/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,6 @@ fetchcontent_declare(googletest
2323
GTest)
2424
fetchcontent_makeavailable(googletest)
2525

26+
add_subdirectory(arrow)
27+
add_subdirectory(avro)
2628
add_subdirectory(core)

test/arrow/CMakeLists.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
if(ICEBERG_ARROW)
19+
add_executable(arrow_unittest)
20+
target_sources(arrow_unittest PRIVATE arrow_test.cc)
21+
target_link_libraries(arrow_unittest PRIVATE iceberg_arrow_static Arrow::arrow_static
22+
GTest::gtest_main)
23+
target_include_directories(arrow_unittest PRIVATE "${ICEBERG_INCLUDES}")
24+
add_test(NAME arrow_unittest COMMAND arrow_unittest)
25+
endif()

test/arrow/arrow_test.cc

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <arrow/api.h>
21+
#include <arrow/c/bridge.h>
22+
#include <arrow/result.h>
23+
#include <gtest/gtest.h>
24+
25+
#include "iceberg/arrow_c_data_internal.h"
26+
27+
namespace iceberg {
28+
29+
TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) {
30+
auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayByNanoarrow();
31+
32+
auto arrow_schema = ::arrow::ImportSchema(&schema).ValueOrDie();
33+
EXPECT_EQ(arrow_schema->num_fields(), 2);
34+
35+
auto id_field = arrow_schema->field(0);
36+
EXPECT_EQ(id_field->name(), "id");
37+
EXPECT_EQ(id_field->type()->id(), ::arrow::Type::INT64);
38+
EXPECT_FALSE(id_field->nullable());
39+
40+
auto name_field = arrow_schema->field(1);
41+
EXPECT_EQ(name_field->name(), "name");
42+
EXPECT_EQ(name_field->type()->id(), ::arrow::Type::STRING);
43+
EXPECT_TRUE(name_field->nullable());
44+
45+
auto arrow_record_batch = ::arrow::ImportRecordBatch(&array, arrow_schema).ValueOrDie();
46+
EXPECT_EQ(arrow_record_batch->num_rows(), 3);
47+
EXPECT_EQ(arrow_record_batch->num_columns(), 2);
48+
49+
auto id_column = arrow_record_batch->column(0);
50+
EXPECT_EQ(id_column->type()->id(), ::arrow::Type::INT64);
51+
EXPECT_EQ(id_column->GetScalar(0).ValueOrDie()->ToString(), "1");
52+
EXPECT_EQ(id_column->GetScalar(1).ValueOrDie()->ToString(), "2");
53+
EXPECT_EQ(id_column->GetScalar(2).ValueOrDie()->ToString(), "3");
54+
55+
auto name_column = arrow_record_batch->column(1);
56+
EXPECT_EQ(name_column->type()->id(), ::arrow::Type::STRING);
57+
EXPECT_EQ(name_column->GetScalar(0).ValueOrDie()->ToString(), "a");
58+
EXPECT_EQ(name_column->GetScalar(1).ValueOrDie()->ToString(), "b");
59+
EXPECT_EQ(name_column->GetScalar(2).ValueOrDie()->ToString(), "c");
60+
}
61+
62+
} // namespace iceberg

0 commit comments

Comments
 (0)