|
27 | 27 | #include <avro/Generic.hh> |
28 | 28 | #include <avro/GenericDatum.hh> |
29 | 29 | #include <gtest/gtest.h> |
30 | | -#include <nanoarrow/nanoarrow.hpp> |
31 | 30 |
|
32 | 31 | #include "iceberg/arrow/arrow_fs_file_io_internal.h" |
33 | 32 | #include "iceberg/avro/avro_register.h" |
@@ -107,27 +106,41 @@ class AvroReaderTest : public TempFileTestBase { |
107 | 106 | ASSERT_FALSE(data.value().has_value()); |
108 | 107 | } |
109 | 108 |
|
110 | | - void WriteAndVerify(std::shared_ptr<Schema> schema, const std::string& expected_string, |
111 | | - ArrowArray array) { |
112 | | - iceberg::WriterOptions options; |
113 | | - options.schema = schema; |
114 | | - options.path = temp_avro_file_; |
115 | | - options.io = file_io_; |
| 109 | + void WriteAndVerify(std::shared_ptr<Schema> schema, |
| 110 | + const std::string& expected_string) { |
| 111 | + ArrowSchema arrow_c_schema; |
| 112 | + ASSERT_THAT(ToArrowSchema(*schema, &arrow_c_schema), IsOk()); |
116 | 113 |
|
117 | | - auto writer_result = |
118 | | - iceberg::WriterFactoryRegistry::Open(iceberg::FileFormatType::kAvro, options); |
| 114 | + auto arrow_schema_result = ::arrow::ImportType(&arrow_c_schema); |
| 115 | + ASSERT_TRUE(arrow_schema_result.ok()); |
| 116 | + auto arrow_schema = arrow_schema_result.ValueOrDie(); |
| 117 | + |
| 118 | + auto array_result = ::arrow::json::ArrayFromJSONString(arrow_schema, expected_string); |
| 119 | + ASSERT_TRUE(array_result.ok()); |
| 120 | + auto array = array_result.ValueOrDie(); |
| 121 | + |
| 122 | + struct ArrowArray arrow_array; |
| 123 | + auto export_result = ::arrow::ExportArray(*array, &arrow_array); |
| 124 | + ASSERT_TRUE(export_result.ok()); |
| 125 | + |
| 126 | + auto writer_result = WriterFactoryRegistry::Open( |
| 127 | + FileFormatType::kAvro, |
| 128 | + {.path = temp_avro_file_, .io = file_io_, .schema = schema}); |
119 | 129 | ASSERT_TRUE(writer_result.has_value()); |
120 | | - auto writer = std::move(writer_result).value(); |
121 | | - ASSERT_THAT(writer->Write(array), IsOk()); |
| 130 | + auto writer = std::move(writer_result.value()); |
| 131 | + ASSERT_THAT(writer->Write(arrow_array), IsOk()); |
122 | 132 | ASSERT_THAT(writer->Close(), IsOk()); |
123 | 133 |
|
| 134 | + auto file_info_result = local_fs_->GetFileInfo(temp_avro_file_); |
| 135 | + ASSERT_TRUE(file_info_result.ok()); |
| 136 | + ASSERT_EQ(file_info_result->size(), writer->length().value()); |
| 137 | + |
124 | 138 | auto reader_result = ReaderFactoryRegistry::Open( |
125 | 139 | FileFormatType::kAvro, |
126 | 140 | {.path = temp_avro_file_, .io = file_io_, .projection = schema}); |
127 | 141 | ASSERT_THAT(reader_result, IsOk()); |
128 | 142 | auto reader = std::move(reader_result.value()); |
129 | 143 | ASSERT_NO_FATAL_FAILURE(VerifyNextBatch(*reader, expected_string)); |
130 | | - |
131 | 144 | ASSERT_NO_FATAL_FAILURE(VerifyExhausted(*reader)); |
132 | 145 | } |
133 | 146 |
|
@@ -191,67 +204,24 @@ TEST_F(AvroReaderTest, AvroWriterBasicType) { |
191 | 204 | auto schema = std::make_shared<iceberg::Schema>(std::vector<SchemaField>{ |
192 | 205 | SchemaField::MakeRequired(1, "name", std::make_shared<StringType>())}); |
193 | 206 |
|
194 | | - ArrowSchema struct_schema; |
195 | | - ASSERT_THAT(ToArrowSchema(*schema, &struct_schema), IsOk()); |
196 | | - |
197 | | - ArrowArray array; |
198 | | - NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(&array, &struct_schema, nullptr)); |
199 | | - NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(&array)); |
200 | | - |
201 | | - std::vector<std::string> str_values{"Hello", "世界", "nanoarrow"}; |
202 | 207 | std::string expected_string = R"([["Hello"], ["世界"], ["nanoarrow"]])"; |
203 | 208 |
|
204 | | - for (const auto& element : str_values) { |
205 | | - NANOARROW_THROW_NOT_OK( |
206 | | - ArrowArrayAppendString(array.children[0], ArrowCharView(element.c_str()))); |
207 | | - NANOARROW_THROW_NOT_OK(ArrowArrayFinishElement(&array)); |
208 | | - } |
209 | | - NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(&array, nullptr)); |
210 | | - |
211 | | - WriteAndVerify(schema, expected_string, array); |
212 | | - |
213 | | - ArrowSchemaRelease(&struct_schema); |
| 209 | + WriteAndVerify(schema, expected_string); |
214 | 210 | } |
215 | 211 |
|
216 | 212 | TEST_F(AvroReaderTest, AvroWriterNestedType) { |
217 | | - auto nested_schema = std::make_shared<iceberg::Schema>(std::vector<SchemaField>{ |
| 213 | + auto schema = std::make_shared<iceberg::Schema>(std::vector<SchemaField>{ |
218 | 214 | SchemaField::MakeRequired(1, "id", std::make_shared<IntType>()), |
219 | 215 | SchemaField::MakeRequired( |
220 | 216 | 2, "info", |
221 | 217 | std::make_shared<iceberg::StructType>(std::vector<SchemaField>{ |
222 | 218 | SchemaField::MakeRequired(3, "name", std::make_shared<StringType>()), |
223 | 219 | SchemaField::MakeRequired(4, "age", std::make_shared<IntType>())}))}); |
224 | 220 |
|
225 | | - ArrowSchema struct_schema; |
226 | | - ASSERT_THAT(ToArrowSchema(*nested_schema, &struct_schema), IsOk()); |
227 | | - |
228 | | - ArrowArray array; |
229 | | - NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(&array, &struct_schema, nullptr)); |
230 | | - |
231 | | - std::vector<int> int_array = {1, 2, 3}; |
232 | | - std::vector<std::pair<std::string, int>> info_array = { |
233 | | - {"Alice", 25}, {"Bob", 30}, {"Ivy", 35}}; |
234 | | - |
235 | 221 | std::string expected_string = |
236 | 222 | R"([[1, ["Alice", 25]], [2, ["Bob", 30]], [3, ["Ivy", 35]]])"; |
237 | 223 |
|
238 | | - NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(&array)); |
239 | | - |
240 | | - for (int i = 0; i < int_array.size(); i++) { |
241 | | - NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(array.children[0], int_array[i])); |
242 | | - |
243 | | - NANOARROW_THROW_NOT_OK(ArrowArrayAppendString( |
244 | | - array.children[1]->children[0], ArrowCharView(info_array[i].first.c_str()))); |
245 | | - NANOARROW_THROW_NOT_OK( |
246 | | - ArrowArrayAppendInt(array.children[1]->children[1], info_array[i].second)); |
247 | | - |
248 | | - NANOARROW_THROW_NOT_OK(ArrowArrayFinishElement(array.children[1])); |
249 | | - NANOARROW_THROW_NOT_OK(ArrowArrayFinishElement(&array)); |
250 | | - } |
251 | | - |
252 | | - NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(&array, nullptr)); |
253 | | - WriteAndVerify(nested_schema, expected_string, array); |
254 | | - ArrowSchemaRelease(&struct_schema); |
| 224 | + WriteAndVerify(schema, expected_string); |
255 | 225 | } |
256 | 226 |
|
257 | 227 | } // namespace iceberg::avro |
0 commit comments