Skip to content

Commit c40830e

Browse files
odysaalamb
andauthored
[Variant] test: add variant object tests with different sizes (apache#7896)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Closes apache#7821 . # Rationale for this change - [x] VariantObject with between 2^8 and 2^16 elements ( field_id_size_minus_1 = 1, 2 byte field ids) - [x] VariantObject with between 2^16 and 2^24 elements ( field_id_size_minus_1 = 2, 3 byte field ids) - [x] VariantObject with between 2^24 and 2^32 elements ( field_id_size_minus_1 = 3, 4 byte field ids) - Inserting 2^24 + 1 elements takes too long. - [x] VariantObject with total child data length between 2^8 and 2^16 elements ( field_offset_size_minus_1 = 1, 2 byte field offsets) - [x] VariantObject with total child data length between 2^16 and 2^24 elements ( field_offset_size_minus_1 = 2, 3 byte field offsets) - [x] VariantObject with total child data length between 2^24 and 2^32 elements ( field_offset_size_minus_1 = 3, 4 byte field offsets) --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent b1a1864 commit c40830e

File tree

1 file changed

+112
-1
lines changed

1 file changed

+112
-1
lines changed

parquet-variant/src/variant/object.rs

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,13 +389,15 @@ impl<'m, 'v> VariantObject<'m, 'v> {
389389

390390
#[cfg(test)]
391391
mod tests {
392+
use crate::VariantBuilder;
393+
392394
use super::*;
393395

394396
#[test]
395397
fn test_variant_object_simple() {
396398
// Create metadata with field names: "age", "name", "active" (sorted)
397399
// Header: version=1, sorted=1, offset_size=1 (offset_size_minus_one=0)
398-
// So header byte = 00_0_1_0001 = 0x10
400+
// So header byte = 00_0_1_0001 = 0x11
399401
let metadata_bytes = vec![
400402
0b0001_0001,
401403
3, // dictionary size
@@ -607,4 +609,113 @@ mod tests {
607609
ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..16 from 15-byte buffer")
608610
));
609611
}
612+
613+
fn test_variant_object_with_count(count: i32, expected_field_id_size: OffsetSizeBytes) {
614+
let field_names: Vec<_> = (0..count).map(|val| val.to_string()).collect();
615+
let mut builder =
616+
VariantBuilder::new().with_field_names(field_names.iter().map(|s| s.as_str()));
617+
618+
let mut obj = builder.new_object();
619+
620+
for i in 0..count {
621+
obj.insert(&field_names[i as usize], i);
622+
}
623+
624+
obj.finish().unwrap();
625+
let (metadata, value) = builder.finish();
626+
let variant = Variant::new(&metadata, &value);
627+
628+
if let Variant::Object(obj) = variant {
629+
assert_eq!(obj.len(), count as usize);
630+
631+
assert_eq!(obj.get(&field_names[0]).unwrap(), Variant::Int32(0));
632+
assert_eq!(
633+
obj.get(&field_names[(count - 1) as usize]).unwrap(),
634+
Variant::Int32(count - 1)
635+
);
636+
assert_eq!(
637+
obj.header.field_id_size, expected_field_id_size,
638+
"Expected {}-byte field IDs, got {}-byte field IDs",
639+
expected_field_id_size as usize, obj.header.field_id_size as usize
640+
);
641+
} else {
642+
panic!("Expected object variant");
643+
}
644+
}
645+
646+
#[test]
647+
fn test_variant_object_257_elements() {
648+
test_variant_object_with_count((1 << 8) + 1, OffsetSizeBytes::Two); // 2^8 + 1, expected 2-byte field IDs
649+
}
650+
651+
#[test]
652+
fn test_variant_object_65537_elements() {
653+
test_variant_object_with_count((1 << 16) + 1, OffsetSizeBytes::Three);
654+
// 2^16 + 1, expected 3-byte field IDs
655+
}
656+
657+
/* Can't run this test now as it takes 45x longer than other tests
658+
#[test]
659+
fn test_variant_object_16777217_elements() {
660+
test_variant_object_with_count((1 << 24) + 1, OffsetSizeBytes::Four);
661+
// 2^24 + 1, expected 4-byte field IDs
662+
}
663+
*/
664+
665+
#[test]
666+
fn test_variant_object_small_sizes_255_elements() {
667+
test_variant_object_with_count(255, OffsetSizeBytes::One);
668+
}
669+
670+
fn test_variant_object_with_large_data(
671+
data_size_per_field: usize,
672+
expected_field_offset_size: OffsetSizeBytes,
673+
) {
674+
let num_fields = 20;
675+
let mut builder = VariantBuilder::new();
676+
let mut obj = builder.new_object();
677+
678+
let str_val = "a".repeat(data_size_per_field);
679+
680+
for val in 0..num_fields {
681+
let key = format!("id_{val}");
682+
obj.insert(&key, str_val.as_str());
683+
}
684+
685+
obj.finish().unwrap();
686+
let (metadata, value) = builder.finish();
687+
let variant = Variant::new(&metadata, &value);
688+
689+
if let Variant::Object(obj) = variant {
690+
assert_eq!(obj.len(), num_fields);
691+
assert_eq!(
692+
obj.header.field_offset_size, expected_field_offset_size,
693+
"Expected {}-byte field offsets, got {}-byte field offsets",
694+
expected_field_offset_size as usize, obj.header.field_offset_size as usize
695+
);
696+
} else {
697+
panic!("Expected object variant");
698+
}
699+
}
700+
701+
#[test]
702+
fn test_variant_object_child_data_0_byte_offsets_minus_one() {
703+
test_variant_object_with_large_data(10, OffsetSizeBytes::One);
704+
}
705+
706+
#[test]
707+
fn test_variant_object_256_bytes_child_data_3_byte_offsets() {
708+
test_variant_object_with_large_data(256 + 1, OffsetSizeBytes::Two); // 2^8 - 2^16 elements
709+
}
710+
711+
#[test]
712+
fn test_variant_object_16777216_bytes_child_data_4_byte_offsets() {
713+
test_variant_object_with_large_data(65536 + 1, OffsetSizeBytes::Three); // 2^16 - 2^24 elements
714+
}
715+
716+
#[test]
717+
fn test_variant_object_65535_bytes_child_data_2_byte_offsets() {
718+
test_variant_object_with_large_data(16777216 + 1, OffsetSizeBytes::Four);
719+
// 2^24
720+
}
610721
}

0 commit comments

Comments
 (0)