Skip to content

Commit 3039e80

Browse files
committed
fix some review comments
1 parent 7d8b2f4 commit 3039e80

File tree

6 files changed

+25
-37
lines changed

6 files changed

+25
-37
lines changed

cmake_modules/IcebergBuildUtils.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ function(add_iceberg_lib LIB_NAME)
131131
endif()
132132

133133
if(LIB_INCLUDES)
134-
target_include_directories(${LIB_NAME}_shared SYSTEM PUBLIC ${ARG_EXTRA_INCLUDES})
134+
target_include_directories(${LIB_NAME}_shared PUBLIC ${ARG_EXTRA_INCLUDES})
135135
endif()
136136

137137
if(ARG_PRIVATE_INCLUDES)
@@ -179,7 +179,7 @@ function(add_iceberg_lib LIB_NAME)
179179
endif()
180180

181181
if(LIB_INCLUDES)
182-
target_include_directories(${LIB_NAME}_static SYSTEM PUBLIC ${ARG_EXTRA_INCLUDES})
182+
target_include_directories(${LIB_NAME}_static PUBLIC ${ARG_EXTRA_INCLUDES})
183183
endif()
184184

185185
if(ARG_PRIVATE_INCLUDES)

src/iceberg/parquet/parquet_reader.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,12 +237,12 @@ class ParquetReader::Impl {
237237
std::shared_ptr<::iceberg::Schema> read_schema_;
238238
// The projection result to apply to the read schema.
239239
SchemaProjection projection_;
240+
// The input stream to read Parquet file.
241+
std::shared_ptr<::arrow::io::RandomAccessFile> input_stream_;
240242
// Parquet file reader to create RecordBatchReader.
241243
std::unique_ptr<::parquet::arrow::FileReader> reader_;
242244
// The context to keep track of the reading progress.
243245
std::unique_ptr<ReadContext> context_;
244-
// The input stream to read Parquet file.
245-
std::shared_ptr<::arrow::io::RandomAccessFile> input_stream_;
246246
};
247247

248248
ParquetReader::~ParquetReader() = default;

src/iceberg/parquet/parquet_writer.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,9 @@ class ParquetWriter::Impl {
6666
auto schema_node = std::static_pointer_cast<::parquet::schema::GroupNode>(
6767
schema_descriptor->schema_root());
6868

69-
std::shared_ptr<::arrow::KeyValueMetadata> metadata =
70-
::arrow::key_value_metadata(options.properties);
71-
7269
ICEBERG_ASSIGN_OR_RAISE(output_stream_, OpenOutputStream(options));
7370
auto file_writer = ::parquet::ParquetFileWriter::Open(output_stream_, schema_node,
74-
writer_properties, metadata);
71+
writer_properties);
7572
ICEBERG_ARROW_RETURN_NOT_OK(::parquet::arrow::FileWriter::Make(
7673
pool_, std::move(file_writer), arrow_schema_, arrow_writer_properties, &writer_));
7774

@@ -106,10 +103,10 @@ class ParquetWriter::Impl {
106103
::arrow::MemoryPool* pool_ = ::arrow::default_memory_pool();
107104
// Schema to write from the Parquet file.
108105
std::shared_ptr<::arrow::Schema> arrow_schema_;
109-
// Parquet file writer to write ArrowArray.
110-
std::unique_ptr<::parquet::arrow::FileWriter> writer_;
111106
// The output stream to write Parquet file.
112107
std::shared_ptr<::arrow::io::OutputStream> output_stream_;
108+
// Parquet file writer to write ArrowArray.
109+
std::unique_ptr<::parquet::arrow::FileWriter> writer_;
113110
};
114111

115112
ParquetWriter::~ParquetWriter() = default;

src/iceberg/schema_internal.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <optional>
2525
#include <string>
2626

27+
#include "iceberg/constants.h"
2728
#include "iceberg/schema.h"
2829
#include "iceberg/type.h"
2930
#include "iceberg/util/macros.h"
@@ -45,7 +46,7 @@ ArrowErrorCode ToArrowSchema(const Type& type, bool optional, std::string_view n
4546
NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&metadata_buffer, nullptr));
4647
if (field_id.has_value()) {
4748
NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderAppend(
48-
&metadata_buffer, ArrowCharView(std::string(kFieldIdKey).c_str()),
49+
&metadata_buffer, ArrowCharView(std::string(kParquetFieldIdKey).c_str()),
4950
ArrowCharView(std::to_string(field_id.value()).c_str())));
5051
}
5152

@@ -185,8 +186,8 @@ int32_t GetFieldId(const ArrowSchema& schema) {
185186
return kUnknownFieldId;
186187
}
187188

188-
ArrowStringView field_id_key{.data = kFieldIdKey.data(),
189-
.size_bytes = kFieldIdKey.size()};
189+
ArrowStringView field_id_key{.data = kParquetFieldIdKey.data(),
190+
.size_bytes = kParquetFieldIdKey.size()};
190191
ArrowStringView field_id_value;
191192
if (ArrowMetadataGetValue(schema.metadata, field_id_key, &field_id_value) !=
192193
NANOARROW_OK) {

src/iceberg/schema_internal.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,12 @@
2424

2525
#include <nanoarrow/nanoarrow.h>
2626

27-
#include "iceberg/constants.h"
2827
#include "iceberg/iceberg_export.h"
2928
#include "iceberg/result.h"
3029
#include "iceberg/type_fwd.h"
3130

3231
namespace iceberg {
3332

34-
// Apache Arrow C++ uses "PARQUET:field_id" to store field IDs for Parquet.
35-
// Here we follow a similar convention for Iceberg but we might also add
36-
// "PARQUET:field_id" in the future once we implement a Parquet writer.
37-
constexpr std::string_view kFieldIdKey = kParquetFieldIdKey;
38-
3933
/// \brief Convert an Iceberg schema to an Arrow schema.
4034
///
4135
/// \param[in] schema The Iceberg schema to convert.

test/parquet_test.cc

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@
2626
#include <arrow/table.h>
2727
#include <arrow/type.h>
2828
#include <arrow/util/key_value_metadata.h>
29-
#include <iceberg/file_reader.h>
30-
#include <iceberg/file_writer.h>
31-
#include <iceberg/result.h>
32-
#include <iceberg/schema_field.h>
33-
#include <iceberg/schema_internal.h>
3429
#include <parquet/arrow/reader.h>
3530
#include <parquet/arrow/writer.h>
3631
#include <parquet/metadata.h>
3732

3833
#include "iceberg/arrow/arrow_error_transform_internal.h"
3934
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
35+
#include "iceberg/file_reader.h"
36+
#include "iceberg/file_writer.h"
4037
#include "iceberg/parquet/parquet_register.h"
38+
#include "iceberg/result.h"
4139
#include "iceberg/schema.h"
40+
#include "iceberg/schema_field.h"
41+
#include "iceberg/schema_internal.h"
4242
#include "iceberg/type.h"
4343
#include "iceberg/util/checked_cast.h"
4444
#include "iceberg/util/macros.h"
@@ -318,13 +318,10 @@ TEST_F(ParquetReadWrite, SimpleStructRoundTrip) {
318318
ASSERT_THAT(ToArrowSchema(*schema, &arrow_c_schema), IsOk());
319319
auto arrow_schema = ::arrow::ImportType(&arrow_c_schema).ValueOrDie();
320320

321-
auto array =
322-
::arrow::json::ArrayFromJSONString(::arrow::struct_(arrow_schema->fields()),
323-
R"([[{"a1": 1, "a2": "abc"}],
324-
[{"a1": 0}],
325-
[{"a2": "edf"}],
326-
[{}]])")
327-
.ValueOrDie();
321+
auto array = ::arrow::json::ArrayFromJSONString(
322+
::arrow::struct_(arrow_schema->fields()),
323+
R"([[{"a1": 1, "a2": "abc"}], [{"a1": 0}], [{"a2": "edf"}], [{}]])")
324+
.ValueOrDie();
328325

329326
std::shared_ptr<::arrow::Array> out;
330327
DoRoundtrip(array, schema, &out);
@@ -348,12 +345,11 @@ TEST_F(ParquetReadWrite, SimpleTypeRoundTrip) {
348345
ASSERT_THAT(ToArrowSchema(*schema, &arrow_c_schema), IsOk());
349346
auto arrow_schema = ::arrow::ImportType(&arrow_c_schema).ValueOrDie();
350347

351-
auto array = ::arrow::json::ArrayFromJSONString(
352-
::arrow::struct_(arrow_schema->fields()),
353-
R"([[true, 1, 2, 1.1, 1.2, "abc", 44614000, 1756696503000000],
354-
[false, 0, 0, 0, 0, "", 0, 0],
355-
[null, null, null, null, null, null, null, null]])")
356-
.ValueOrDie();
348+
auto array =
349+
::arrow::json::ArrayFromJSONString(
350+
::arrow::struct_(arrow_schema->fields()),
351+
R"([[true, 1, 2, 1.1, 1.2, "abc", 44614000, 1756696503000000], [false, 0, 0, 0, 0, "", 0, 0], [null, null, null, null, null, null, null, null]])")
352+
.ValueOrDie();
357353

358354
std::shared_ptr<::arrow::Array> out;
359355
DoRoundtrip(array, schema, &out);

0 commit comments

Comments
 (0)