From 4ab42c4a4d95e2fcee9835fb7cfb8bf60e5c5175 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Sun, 31 Aug 2025 21:57:18 +0800 Subject: [PATCH 1/2] fix: simplify Arrow array building using parent array operations - Replace individual child array management with parent array StartAppending/FinishElement pattern for cleaner code. --- src/iceberg/arrow_c_data_internal.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/iceberg/arrow_c_data_internal.cc b/src/iceberg/arrow_c_data_internal.cc index ec9068bdc..1a931af77 100644 --- a/src/iceberg/arrow_c_data_internal.cc +++ b/src/iceberg/arrow_c_data_internal.cc @@ -57,20 +57,16 @@ std::pair CreateExampleArrowSchemaAndArrayByNanoarrow() ArrowArray* int64_array = out_array.children[0]; ArrowArray* string_array = out_array.children[1]; - NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(int64_array)); - NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(string_array)); + NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(&out_array)); for (int64_t i = 0; i < kNumValues; i++) { NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(int64_array, int64_values[i])); NANOARROW_THROW_NOT_OK( ArrowArrayAppendString(string_array, ArrowCharView(string_values[i].c_str()))); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishElement(&out_array)); } - NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(int64_array, nullptr)); - NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(string_array, nullptr)); - - out_array.length = kNumValues; - out_array.null_count = 0; + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(&out_array, nullptr)); return {out_schema, out_array}; } From 43c8560259bc0257cedcbb3de9c58d0633ae1f79 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Mon, 1 Sep 2025 16:16:34 +0800 Subject: [PATCH 2/2] refactor: remove unused arrow_c_data_internal files - Delete unused nanoarrow example code and test files - Clean up dependencies and remove obsolete test cases --- src/iceberg/CMakeLists.txt | 1 - src/iceberg/arrow_c_data_internal.cc | 81 ---------------------------- src/iceberg/arrow_c_data_internal.h | 33 ------------ test/arrow_test.cc | 34 ------------ 4 files changed, 149 deletions(-) delete mode 100644 src/iceberg/arrow_c_data_internal.cc delete mode 100644 src/iceberg/arrow_c_data_internal.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index b509c2579..bccd4d9d7 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -18,7 +18,6 @@ set(ICEBERG_INCLUDES "$" "$") set(ICEBERG_SOURCES - arrow_c_data_internal.cc catalog/in_memory_catalog.cc expression/expression.cc expression/literal.cc diff --git a/src/iceberg/arrow_c_data_internal.cc b/src/iceberg/arrow_c_data_internal.cc deleted file mode 100644 index 1a931af77..000000000 --- a/src/iceberg/arrow_c_data_internal.cc +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "iceberg/arrow_c_data_internal.h" - -#include -#include -#include - -#include - -namespace iceberg::internal { - -std::pair CreateExampleArrowSchemaAndArrayByNanoarrow() { - ArrowSchema out_schema; - - // Initializes the root struct schema - NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(&out_schema, NANOARROW_TYPE_STRUCT)); - NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateChildren(&out_schema, 2)); - - // Set up the non-nullable int64 field - struct ArrowSchema* int64_field = out_schema.children[0]; - ArrowSchemaInit(int64_field); - NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(int64_field, NANOARROW_TYPE_INT64)); - NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(int64_field, "id")); - int64_field->flags &= ~ARROW_FLAG_NULLABLE; - - // Set up the nullable string field - struct ArrowSchema* string_field = out_schema.children[1]; - ArrowSchemaInit(string_field); - NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(string_field, NANOARROW_TYPE_STRING)); - NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(string_field, "name")); - string_field->flags |= ARROW_FLAG_NULLABLE; - - constexpr int64_t kNumValues = 3; - std::array int64_values = {1, 2, 3}; - std::array string_values = {"a", "b", "c"}; - - ArrowArray out_array; - NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(&out_array, &out_schema, nullptr)); - ArrowArray* int64_array = out_array.children[0]; - ArrowArray* string_array = out_array.children[1]; - - NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(&out_array)); - - for (int64_t i = 0; i < kNumValues; i++) { - NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(int64_array, int64_values[i])); - NANOARROW_THROW_NOT_OK( - ArrowArrayAppendString(string_array, ArrowCharView(string_values[i].c_str()))); - NANOARROW_THROW_NOT_OK(ArrowArrayFinishElement(&out_array)); - } - - NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(&out_array, nullptr)); - - return {out_schema, out_array}; -} - -void TestNlohmannJsonCompile() { - nlohmann::json j; - j["name"] = "foo"; - j["age"] = 30; - j["city"] = "New York"; -} - -} // namespace iceberg::internal diff --git a/src/iceberg/arrow_c_data_internal.h b/src/iceberg/arrow_c_data_internal.h deleted file mode 100644 index 2d913c55e..000000000 --- a/src/iceberg/arrow_c_data_internal.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#pragma once - -#include - -namespace iceberg::internal { - -/** - * @brief Create a simple schema with non-nullable int64 and nullable string fields. - * - * This is the example code to demonstrate the usage of nanoarrow API. - */ -std::pair CreateExampleArrowSchemaAndArrayByNanoarrow(); - -} // namespace iceberg::internal diff --git a/test/arrow_test.cc b/test/arrow_test.cc index 1f13cb6d9..202463df0 100644 --- a/test/arrow_test.cc +++ b/test/arrow_test.cc @@ -28,46 +28,12 @@ #include #include -#include "iceberg/arrow_c_data_internal.h" #include "iceberg/schema.h" #include "iceberg/schema_internal.h" #include "matchers.h" namespace iceberg { -TEST(ArrowCDataTest, CheckArrowSchemaAndArrayByNanoarrow) { - auto [schema, array] = internal::CreateExampleArrowSchemaAndArrayByNanoarrow(); - - auto arrow_schema = ::arrow::ImportSchema(&schema).ValueOrDie(); - EXPECT_EQ(arrow_schema->num_fields(), 2); - - auto id_field = arrow_schema->field(0); - EXPECT_EQ(id_field->name(), "id"); - EXPECT_EQ(id_field->type()->id(), ::arrow::Type::INT64); - EXPECT_FALSE(id_field->nullable()); - - auto name_field = arrow_schema->field(1); - EXPECT_EQ(name_field->name(), "name"); - EXPECT_EQ(name_field->type()->id(), ::arrow::Type::STRING); - EXPECT_TRUE(name_field->nullable()); - - auto arrow_record_batch = ::arrow::ImportRecordBatch(&array, arrow_schema).ValueOrDie(); - EXPECT_EQ(arrow_record_batch->num_rows(), 3); - EXPECT_EQ(arrow_record_batch->num_columns(), 2); - - auto id_column = arrow_record_batch->column(0); - EXPECT_EQ(id_column->type()->id(), ::arrow::Type::INT64); - EXPECT_EQ(id_column->GetScalar(0).ValueOrDie()->ToString(), "1"); - EXPECT_EQ(id_column->GetScalar(1).ValueOrDie()->ToString(), "2"); - EXPECT_EQ(id_column->GetScalar(2).ValueOrDie()->ToString(), "3"); - - auto name_column = arrow_record_batch->column(1); - EXPECT_EQ(name_column->type()->id(), ::arrow::Type::STRING); - EXPECT_EQ(name_column->GetScalar(0).ValueOrDie()->ToString(), "a"); - EXPECT_EQ(name_column->GetScalar(1).ValueOrDie()->ToString(), "b"); - EXPECT_EQ(name_column->GetScalar(2).ValueOrDie()->ToString(), "c"); -} - struct ToArrowSchemaParam { std::shared_ptr iceberg_type; bool optional = true;