Skip to content

Commit 81d4861

Browse files
committed
add example code and test
1 parent 0d3b344 commit 81d4861

File tree

11 files changed

+342
-11
lines changed

11 files changed

+342
-11
lines changed

cmake_modules/IcebergThirdpartyToolchain.cmake

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ function(resolve_arrow_dependency)
6565
set(ARROW_BUILD_STATIC
6666
ON
6767
CACHE BOOL "" FORCE)
68+
# To workaround https://github.com/apache/arrow/pull/45513
69+
set(ARROW_IPC
70+
ON
71+
CACHE BOOL "" FORCE)
6872
set(ARROW_FILESYSTEM
6973
OFF
7074
CACHE BOOL "" FORCE)
@@ -207,7 +211,7 @@ function(resolve_nanoarrow_dependency)
207211
prepare_fetchcontent()
208212

209213
set(NANOARROW_NAMESPACE
210-
"iceberg"
214+
"Iceberg"
211215
CACHE STRING "" FORCE)
212216

213217
fetchcontent_declare(nanoarrow
@@ -245,7 +249,7 @@ function(resolve_sparrow_dependency)
245249
fetchcontent_declare(sparrow
246250
${FC_DECLARE_COMMON_OPTIONS}
247251
GIT_REPOSITORY https://github.com/man-group/sparrow.git
248-
GIT_TAG b0794cace22a55c32e90c0236034e040b822b957 # 0.3.0
252+
GIT_TAG f2fdcadc07538d558f128e887257e2ac19610adf # 0.5.0
249253
)
250254
fetchcontent_makeavailable(sparrow)
251255

src/iceberg/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,12 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
set(ICEBERG_SOURCES demo_table.cc schema.cc schema_field.cc type.cc)
18+
set(ICEBERG_SOURCES
19+
arrow_c_data_internal.cc
20+
demo_table.cc
21+
schema.cc
22+
schema_field.cc
23+
type.cc)
1924

2025
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
2126
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)

src/iceberg/arrow_c_data.h

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/arrow_c_data.h
23+
/// Arrow C data interface
24+
///
25+
/// The Arrow C Data interface (https://arrow.apache.org/docs/format/CDataInterface.html)
26+
/// is part of the Arrow Columnar Format specification
27+
/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for
28+
/// documentation of these structures.
29+
30+
#include <cstdint>
31+
32+
#ifndef ARROW_C_DATA_INTERFACE
33+
# define ARROW_C_DATA_INTERFACE
34+
35+
extern "C" {
36+
struct ArrowSchema {
37+
// Array type description
38+
const char* format;
39+
const char* name;
40+
const char* metadata;
41+
int64_t flags;
42+
int64_t n_children;
43+
struct ArrowSchema** children;
44+
struct ArrowSchema* dictionary;
45+
46+
// Release callback
47+
void (*release)(struct ArrowSchema*);
48+
// Opaque producer-specific data
49+
void* private_data;
50+
};
51+
52+
struct ArrowArray {
53+
// Array data description
54+
int64_t length;
55+
int64_t null_count;
56+
int64_t offset;
57+
int64_t n_buffers;
58+
int64_t n_children;
59+
const void** buffers;
60+
struct ArrowArray** children;
61+
struct ArrowArray* dictionary;
62+
63+
// Release callback
64+
void (*release)(struct ArrowArray*);
65+
// Opaque producer-specific data
66+
void* private_data;
67+
};
68+
69+
} // extern "C"
70+
71+
#endif // ARROW_C_DATA_INTERFACE
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/arrow_c_data_internal.h"
21+
22+
#include <sparrow/builder/builder.hpp>
23+
24+
namespace iceberg::internal {
25+
26+
std::pair<ArrowSchema, ArrowArray> CreateExampleArrowSchemaAndArrayByNanoarrow() {
27+
ArrowSchema out_schema;
28+
29+
// Initializes the root struct schema
30+
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(&out_schema, NANOARROW_TYPE_STRUCT));
31+
NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateChildren(&out_schema, 2));
32+
33+
// Set up the non-nullable int64 field
34+
struct ArrowSchema* int64_field = out_schema.children[0];
35+
ArrowSchemaInit(int64_field);
36+
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(int64_field, NANOARROW_TYPE_INT64));
37+
NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(int64_field, "id"));
38+
int64_field->flags &= ~ARROW_FLAG_NULLABLE;
39+
40+
// Set up the nullable string field
41+
struct ArrowSchema* string_field = out_schema.children[1];
42+
ArrowSchemaInit(string_field);
43+
NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(string_field, NANOARROW_TYPE_STRING));
44+
NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(string_field, "name"));
45+
string_field->flags |= ARROW_FLAG_NULLABLE;
46+
47+
constexpr int64_t kNumValues = 3;
48+
std::array<int64_t, kNumValues> int64_values = {1, 2, 3};
49+
std::array<std::string, kNumValues> string_values = {"a", "b", "c"};
50+
51+
ArrowArray out_array;
52+
NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(&out_array, &out_schema, nullptr));
53+
ArrowArray* int64_array = out_array.children[0];
54+
ArrowArray* string_array = out_array.children[1];
55+
56+
NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(int64_array));
57+
NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(string_array));
58+
59+
for (int64_t i = 0; i < kNumValues; i++) {
60+
NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(int64_array, int64_values[i]));
61+
NANOARROW_THROW_NOT_OK(
62+
ArrowArrayAppendString(string_array, ArrowCharView(string_values[i].c_str())));
63+
}
64+
65+
NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(int64_array, nullptr));
66+
NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(string_array, nullptr));
67+
68+
out_array.length = kNumValues;
69+
out_array.null_count = 0;
70+
71+
return {out_schema, out_array};
72+
}
73+
74+
std::pair<ArrowSchema, ArrowArray> CreateExampleArrowSchemaAndArrayBySparrow() {
75+
using struct_type = std::tuple<int64_t, sparrow::nullable<std::string>>;
76+
std::vector<struct_type> values = {
77+
{1, "a"},
78+
{2, "b"},
79+
{3, "c"},
80+
};
81+
auto sparrow_array = sparrow::build(values);
82+
83+
// Demonstrate the use of arrow_proxy to modify the schema
84+
auto [_, non_owning_schema] = sparrow::get_arrow_structures(sparrow_array);
85+
non_owning_schema->children[0]->name = "id";
86+
non_owning_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
87+
non_owning_schema->children[1]->name = "name";
88+
non_owning_schema->children[1]->flags |= ARROW_FLAG_NULLABLE;
89+
90+
auto [array, schema] = sparrow::extract_arrow_structures(std::move(sparrow_array));
91+
return {schema, array};
92+
}
93+
94+
} // namespace iceberg::internal
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <nanoarrow/nanoarrow.hpp>
23+
#include <sparrow/sparrow.hpp>
24+
25+
namespace iceberg::internal {
26+
27+
/**
28+
* @brief Create a simple schema with non-nullable int64 and nullable string fields.
29+
*
30+
* This is the example code to demonstrate the usage of nanoarrow API.
31+
*/
32+
std::pair<ArrowSchema, ArrowArray> CreateExampleArrowSchemaAndArrayByNanoarrow();
33+
34+
/**
35+
* @brief Create a simple ArrowArray with non-nullable int64 and nullable string fields
36+
*
37+
* This is the example code to demonstrate the usage of sparrow API.
38+
*/
39+
std::pair<ArrowSchema, ArrowArray> CreateExampleArrowSchemaAndArrayBySparrow();
40+
41+
} // namespace iceberg::internal

test/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,6 @@ fetchcontent_declare(googletest
2323
GTest)
2424
fetchcontent_makeavailable(googletest)
2525

26+
add_subdirectory(arrow)
27+
add_subdirectory(avro)
2628
add_subdirectory(core)

test/arrow/CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
if(ICEBERG_ARROW)
19+
add_executable(arrow_unittest)
20+
target_sources(arrow_unittest PRIVATE arrow_test.cc)
21+
target_link_libraries(arrow_unittest PRIVATE iceberg_arrow_static Arrow::arrow_static GTest::gtest_main)
22+
target_include_directories(arrow_unittest PRIVATE "${ICEBERG_INCLUDES}")
23+
add_test(NAME arrow_unittest COMMAND arrow_unittest)
24+
endif()

test/arrow/arrow_test.cc

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <arrow/api.h>
21+
#include <arrow/c/bridge.h>
22+
#include <arrow/result.h>
23+
#include <gtest/gtest.h>
24+
25+
#include "iceberg/arrow_c_data_internal.h"
26+
27+
namespace iceberg {
28+
29+
namespace {
30+
31+
void CheckArrowSchemaAndArray(ArrowSchema* schema, ArrowArray* array) {
32+
auto arrow_schema = ::arrow::ImportSchema(schema).ValueOrDie();
33+
EXPECT_EQ(arrow_schema->num_fields(), 2);
34+
35+
auto id_field = arrow_schema->field(0);
36+
EXPECT_EQ(id_field->name(), "id");
37+
EXPECT_EQ(id_field->type()->id(), ::arrow::Type::INT64);
38+
EXPECT_FALSE(id_field->nullable());
39+
40+
auto name_field = arrow_schema->field(1);
41+
EXPECT_EQ(name_field->name(), "name");
42+
EXPECT_EQ(name_field->type()->id(), ::arrow::Type::STRING);
43+
EXPECT_TRUE(name_field->nullable());
44+
45+
auto arrow_record_batch = ::arrow::ImportRecordBatch(array, arrow_schema).ValueOrDie();
46+
EXPECT_EQ(arrow_record_batch->num_rows(), 3);
47+
EXPECT_EQ(arrow_record_batch->num_columns(), 2);
48+
49+
auto id_column = arrow_record_batch->column(0);
50+
EXPECT_EQ(id_column->type()->id(), ::arrow::Type::INT64);
51+
EXPECT_EQ(id_column->GetScalar(0).ValueOrDie()->ToString(), "1");
52+
EXPECT_EQ(id_column->GetScalar(1).ValueOrDie()->ToString(), "2");
53+
EXPECT_EQ(id_column->GetScalar(2).ValueOrDie()->ToString(), "3");
54+
55+
auto name_column = arrow_record_batch->column(1);
56+
EXPECT_EQ(name_column->type()->id(), ::arrow::Type::STRING);
57+
EXPECT_EQ(name_column->GetScalar(0).ValueOrDie()->ToString(), "a");
58+
EXPECT_EQ(name_column->GetScalar(1).ValueOrDie()->ToString(), "b");
59+
EXPECT_EQ(name_column->GetScalar(2).ValueOrDie()->ToString(), "c");
60+
}
61+
62+
} // namespace
63+
64+
TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) {
65+
auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayByNanoarrow();
66+
CheckArrowSchemaAndArray(&schema, &array);
67+
}
68+
69+
TEST(ArrowCDataTest, CheckArrowSchemaAndArrayBySparrow) {
70+
auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayBySparrow();
71+
CheckArrowSchemaAndArray(&schema, &array);
72+
}
73+
74+
} // namespace iceberg

test/avro/CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
if(ICEBERG_AVRO)
19+
add_executable(avro_unittest)
20+
target_sources(avro_unittest PRIVATE avro_test.cc)
21+
target_link_libraries(avro_unittest PRIVATE iceberg_avro_static GTest::gtest_main)
22+
target_include_directories(avro_unittest PRIVATE "${ICEBERG_INCLUDES}")
23+
add_test(NAME avro_unittest COMMAND avro_unittest)
24+
endif()
File renamed without changes.

0 commit comments

Comments
 (0)