Skip to content

Commit b355f6f

Browse files
committed
add more test cases and address comments
1 parent 90bbfd8 commit b355f6f

File tree

3 files changed

+152
-6
lines changed

3 files changed

+152
-6
lines changed

src/iceberg/avro/avro_data_util.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ Status AppendPrimitiveValueToBuilder(const ::avro::NodePtr& avro_node,
203203
ICEBERG_ARROW_RETURN_NOT_OK(
204204
builder->Append(static_cast<int64_t>(avro_datum.value<int32_t>())));
205205
} else {
206-
return InvalidArgument("Expected Avro long for long field, got: {}",
206+
return InvalidArgument("Expected Avro int/long for long field, got: {}",
207207
ToString(avro_node));
208208
}
209209
return {};
@@ -227,7 +227,7 @@ Status AppendPrimitiveValueToBuilder(const ::avro::NodePtr& avro_node,
227227
ICEBERG_ARROW_RETURN_NOT_OK(
228228
builder->Append(static_cast<double>(avro_datum.value<float>())));
229229
} else {
230-
return InvalidArgument("Expected Avro double for double field, got: {}",
230+
return InvalidArgument("Expected Avro float/double for double field, got: {}",
231231
ToString(avro_node));
232232
}
233233
return {};
@@ -351,8 +351,8 @@ Status AppendPrimitiveValueToBuilder(const ::avro::NodePtr& avro_node,
351351
}
352352

353353
default:
354-
return InvalidArgument("Unsupported primitive type: {}",
355-
projected_field.type()->ToString());
354+
return InvalidArgument("Unsupported primitive type {} to append avro node {}",
355+
projected_field.type()->ToString(), ToString(avro_node));
356356
}
357357
}
358358

src/iceberg/avro/avro_schema_util.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ Status ValidateAvroSchemaEvolution(const Type& expected_type,
501501
case TypeId::kTimestamp:
502502
if (avro_node->type() == ::avro::AVRO_LONG &&
503503
HasLogicalType(avro_node, ::avro::LogicalType::TIMESTAMP_MICROS) &&
504-
GetAdjustToUtc(avro_node).value_or("false") == "true") {
504+
GetAdjustToUtc(avro_node).value_or("false") == "false") {
505505
return {};
506506
}
507507
break;

test/avro_data_test.cc

Lines changed: 147 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
* under the License.
1818
*/
1919

20+
#include <ranges>
21+
2022
#include <arrow/c/bridge.h>
2123
#include <arrow/json/from_string.h>
2224
#include <avro/Compiler.hh>
@@ -178,6 +180,123 @@ const std::vector<AppendDatumParam> kPrimitiveTestCases = {
178180
.expected_json =
179181
R"([{"a": "test_string_0"}, {"a": "test_string_1"}, {"a": "test_string_2"}])",
180182
},
183+
{
184+
.name = "Binary",
185+
.projected_type = std::make_shared<BinaryType>(),
186+
.source_type = std::make_shared<BinaryType>(),
187+
.value_setter =
188+
[](::avro::GenericDatum& datum, int i) {
189+
datum.value<::avro::GenericRecord>()
190+
.fieldAt(0)
191+
.value<std::vector<uint8_t>>() = {static_cast<uint8_t>('a' + i),
192+
static_cast<uint8_t>('b' + i),
193+
static_cast<uint8_t>('c' + i)};
194+
},
195+
.expected_json = R"([{"a": "abc"}, {"a": "bcd"}, {"a": "cde"}])",
196+
},
197+
{
198+
.name = "Fixed",
199+
.projected_type = std::make_shared<FixedType>(4),
200+
.source_type = std::make_shared<FixedType>(4),
201+
.value_setter =
202+
[](::avro::GenericDatum& datum, int i) {
203+
datum.value<::avro::GenericRecord>()
204+
.fieldAt(0)
205+
.value<::avro::GenericFixed>()
206+
.value() = {
207+
static_cast<uint8_t>('a' + i), static_cast<uint8_t>('b' + i),
208+
static_cast<uint8_t>('c' + i), static_cast<uint8_t>('d' + i)};
209+
},
210+
.expected_json = R"([{"a": "abcd"}, {"a": "bcde"}, {"a": "cdef"}])",
211+
},
212+
/// FIXME: NotImplemented: MakeBuilder: cannot construct builder for type
213+
/// extension<arrow.uuid>
214+
// {
215+
// .name = "UUID",
216+
// .projected_type = std::make_shared<UuidType>(),
217+
// .source_type = std::make_shared<UuidType>(),
218+
// .value_setter =
219+
// [](::avro::GenericDatum& datum, int i) {
220+
// datum.value<::avro::GenericRecord>()
221+
// .fieldAt(0)
222+
// .value<::avro::GenericFixed>()
223+
// .value() = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
224+
// 'i', 'j', 'k', 'l', 'm', 'n', 'o',
225+
// static_cast<uint8_t>(i)};
226+
// },
227+
// .expected_json = R"([{"a": "abcdefghijklmnop"}, {"a": "bcdefghijklmnopq"},
228+
// {"a": "cdefghijklmnopqr"}])",
229+
// },
230+
{
231+
.name = "Decimal",
232+
.projected_type = std::make_shared<DecimalType>(10, 2),
233+
.source_type = std::make_shared<DecimalType>(10, 2),
234+
.value_setter =
235+
[](::avro::GenericDatum& datum, int i) {
236+
int32_t decimal_value = i * 1000 + i;
237+
std::vector<uint8_t>& fixed = datum.value<::avro::GenericRecord>()
238+
.fieldAt(0)
239+
.value<::avro::GenericFixed>()
240+
.value();
241+
// The byte array must contain the two's-complement representation of
242+
// the unscaled integer value in big-endian byte order.
243+
for (uint8_t& rvalue : std::ranges::reverse_view(fixed)) {
244+
rvalue = static_cast<uint8_t>(decimal_value & 0xFF);
245+
decimal_value >>= 8;
246+
}
247+
},
248+
.expected_json = R"([{"a": "0.00"}, {"a": "10.01"}, {"a": "20.02"}])",
249+
},
250+
{
251+
.name = "Date",
252+
.projected_type = std::make_shared<DateType>(),
253+
.source_type = std::make_shared<DateType>(),
254+
.value_setter =
255+
[](::avro::GenericDatum& datum, int i) {
256+
// Date as days since epoch (1970-01-01)
257+
// 0 = 1970-01-01, 1 = 1970-01-02, etc.
258+
datum.value<::avro::GenericRecord>().fieldAt(0).value<int32_t>() =
259+
18000 + i; // ~2019-04-11 + i days
260+
},
261+
.expected_json = R"([{"a": 18000}, {"a": 18001}, {"a": 18002}])",
262+
},
263+
{
264+
.name = "Time",
265+
.projected_type = std::make_shared<TimeType>(),
266+
.source_type = std::make_shared<TimeType>(),
267+
.value_setter =
268+
[](::avro::GenericDatum& datum, int i) {
269+
// Time as microseconds since midnight
270+
// 12:30:45.123456 + i seconds = 45045123456 + i*1000000 microseconds
271+
datum.value<::avro::GenericRecord>().fieldAt(0).value<int64_t>() =
272+
45045123456LL + i * 1000000LL;
273+
},
274+
.expected_json =
275+
R"([{"a": 45045123456}, {"a": 45046123456}, {"a": 45047123456}])",
276+
},
277+
{
278+
.name = "Timestamp",
279+
.projected_type = std::make_shared<TimestampType>(),
280+
.source_type = std::make_shared<TimestampType>(),
281+
.value_setter =
282+
[](::avro::GenericDatum& datum, int i) {
283+
datum.value<::avro::GenericRecord>().fieldAt(0).value<int64_t>() =
284+
i * 1000000LL;
285+
},
286+
.expected_json = R"([{"a": 0}, {"a": 1000000}, {"a": 2000000}])",
287+
},
288+
{
289+
.name = "TimestampTz",
290+
.projected_type = std::make_shared<TimestampTzType>(),
291+
.source_type = std::make_shared<TimestampTzType>(),
292+
.value_setter =
293+
[](::avro::GenericDatum& datum, int i) {
294+
datum.value<::avro::GenericRecord>().fieldAt(0).value<int64_t>() =
295+
1672531200000000LL + i * 1000000LL;
296+
},
297+
.expected_json =
298+
R"([{"a": 1672531200000000}, {"a": 1672531201000000}, {"a": 1672531202000000}])",
299+
},
181300
{
182301
.name = "IntToLongPromotion",
183302
.projected_type = std::make_shared<LongType>(),
@@ -188,7 +307,34 @@ const std::vector<AppendDatumParam> kPrimitiveTestCases = {
188307
},
189308
.expected_json = R"([{"a": 0}, {"a": 100}, {"a": 200}])",
190309
},
191-
// TODO(gangwu): add test cases for other types
310+
{
311+
.name = "FloatToDoublePromotion",
312+
.projected_type = std::make_shared<DoubleType>(),
313+
.source_type = std::make_shared<FloatType>(),
314+
.value_setter =
315+
[](::avro::GenericDatum& datum, int i) {
316+
datum.value<::avro::GenericRecord>().fieldAt(0).value<float>() = i * 1.0f;
317+
},
318+
.expected_json = R"([{"a": 0.0}, {"a": 1.0}, {"a": 2.0}])",
319+
},
320+
{
321+
.name = "DecimalPrecisionPromotion",
322+
.projected_type = std::make_shared<DecimalType>(10, 2),
323+
.source_type = std::make_shared<DecimalType>(6, 2),
324+
.value_setter =
325+
[](::avro::GenericDatum& datum, int i) {
326+
int32_t decimal_value = i * 1000 + i;
327+
std::vector<uint8_t>& fixed = datum.value<::avro::GenericRecord>()
328+
.fieldAt(0)
329+
.value<::avro::GenericFixed>()
330+
.value();
331+
for (uint8_t& rvalue : std::ranges::reverse_view(fixed)) {
332+
rvalue = static_cast<uint8_t>(decimal_value & 0xFF);
333+
decimal_value >>= 8;
334+
}
335+
},
336+
.expected_json = R"([{"a": "0.00"}, {"a": "10.01"}, {"a": "20.02"}])",
337+
},
192338
};
193339

194340
INSTANTIATE_TEST_SUITE_P(AllPrimitiveTypes, AppendDatumToBuilderTest,

0 commit comments

Comments
 (0)