|
19 | 19 |
|
20 | 20 | #include <arrow/array/array_base.h> |
21 | 21 | #include <arrow/c/bridge.h> |
22 | | -#include <arrow/c/helpers.h> |
23 | 22 | #include <arrow/filesystem/localfs.h> |
24 | 23 | #include <arrow/io/file.h> |
25 | 24 | #include <arrow/json/from_string.h> |
|
31 | 30 |
|
32 | 31 | #include "iceberg/arrow/arrow_fs_file_io_internal.h" |
33 | 32 | #include "iceberg/avro/avro_register.h" |
| 33 | +#include "iceberg/avro/avro_writer.h" |
34 | 34 | #include "iceberg/file_reader.h" |
35 | 35 | #include "iceberg/schema.h" |
| 36 | +#include "iceberg/schema_internal.h" |
36 | 37 | #include "iceberg/type.h" |
37 | 38 | #include "matchers.h" |
38 | 39 | #include "temp_file_test_base.h" |
@@ -105,6 +106,46 @@ class AvroReaderTest : public TempFileTestBase { |
105 | 106 | ASSERT_FALSE(data.value().has_value()); |
106 | 107 | } |
107 | 108 |
|
| 109 | + void WriteAndVerify(std::shared_ptr<Schema> schema, |
| 110 | + const std::string& expected_string) { |
| 111 | + ArrowSchema arrow_c_schema; |
| 112 | + ASSERT_THAT(ToArrowSchema(*schema, &arrow_c_schema), IsOk()); |
| 113 | + |
| 114 | + auto arrow_schema_result = ::arrow::ImportType(&arrow_c_schema); |
| 115 | + ASSERT_TRUE(arrow_schema_result.ok()); |
| 116 | + auto arrow_schema = arrow_schema_result.ValueOrDie(); |
| 117 | + |
| 118 | + auto array_result = ::arrow::json::ArrayFromJSONString(arrow_schema, expected_string); |
| 119 | + ASSERT_TRUE(array_result.ok()); |
| 120 | + auto array = array_result.ValueOrDie(); |
| 121 | + |
| 122 | + struct ArrowArray arrow_array; |
| 123 | + auto export_result = ::arrow::ExportArray(*array, &arrow_array); |
| 124 | + ASSERT_TRUE(export_result.ok()); |
| 125 | + |
| 126 | + auto writer_result = WriterFactoryRegistry::Open( |
| 127 | + FileFormatType::kAvro, |
| 128 | + {.path = temp_avro_file_, .schema = schema, .io = file_io_}); |
| 129 | + ASSERT_TRUE(writer_result.has_value()); |
| 130 | + auto writer = std::move(writer_result.value()); |
| 131 | + ASSERT_THAT(writer->Write(arrow_array), IsOk()); |
| 132 | + ASSERT_THAT(writer->Close(), IsOk()); |
| 133 | + |
| 134 | + auto file_info_result = local_fs_->GetFileInfo(temp_avro_file_); |
| 135 | + ASSERT_TRUE(file_info_result.ok()); |
| 136 | + ASSERT_EQ(file_info_result->size(), writer->length().value()); |
| 137 | + |
| 138 | + auto reader_result = ReaderFactoryRegistry::Open(FileFormatType::kAvro, |
| 139 | + {.path = temp_avro_file_, |
| 140 | + .length = file_info_result->size(), |
| 141 | + .io = file_io_, |
| 142 | + .projection = schema}); |
| 143 | + ASSERT_THAT(reader_result, IsOk()); |
| 144 | + auto reader = std::move(reader_result.value()); |
| 145 | + ASSERT_NO_FATAL_FAILURE(VerifyNextBatch(*reader, expected_string)); |
| 146 | + ASSERT_NO_FATAL_FAILURE(VerifyExhausted(*reader)); |
| 147 | + } |
| 148 | + |
108 | 149 | std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; |
109 | 150 | std::shared_ptr<FileIO> file_io_; |
110 | 151 | std::string temp_avro_file_; |
@@ -161,4 +202,28 @@ TEST_F(AvroReaderTest, ReadWithBatchSize) { |
161 | 202 | ASSERT_NO_FATAL_FAILURE(VerifyExhausted(*reader)); |
162 | 203 | } |
163 | 204 |
|
| 205 | +TEST_F(AvroReaderTest, AvroWriterBasicType) { |
| 206 | + auto schema = std::make_shared<iceberg::Schema>(std::vector<SchemaField>{ |
| 207 | + SchemaField::MakeRequired(1, "name", std::make_shared<StringType>())}); |
| 208 | + |
| 209 | + std::string expected_string = R"([["Hello"], ["世界"], ["nanoarrow"]])"; |
| 210 | + |
| 211 | + WriteAndVerify(schema, expected_string); |
| 212 | +} |
| 213 | + |
| 214 | +TEST_F(AvroReaderTest, AvroWriterNestedType) { |
| 215 | + auto schema = std::make_shared<iceberg::Schema>(std::vector<SchemaField>{ |
| 216 | + SchemaField::MakeRequired(1, "id", std::make_shared<IntType>()), |
| 217 | + SchemaField::MakeRequired( |
| 218 | + 2, "info", |
| 219 | + std::make_shared<iceberg::StructType>(std::vector<SchemaField>{ |
| 220 | + SchemaField::MakeRequired(3, "name", std::make_shared<StringType>()), |
| 221 | + SchemaField::MakeRequired(4, "age", std::make_shared<IntType>())}))}); |
| 222 | + |
| 223 | + std::string expected_string = |
| 224 | + R"([[1, ["Alice", 25]], [2, ["Bob", 30]], [3, ["Ivy", 35]]])"; |
| 225 | + |
| 226 | + WriteAndVerify(schema, expected_string); |
| 227 | +} |
| 228 | + |
164 | 229 | } // namespace iceberg::avro |
0 commit comments