Skip to content
Open
Changes from 3 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
9c25cc4
Add test shredded variant list array
sdf-jkl Sep 15, 2025
ed961a4
Add basic tests
sdf-jkl Sep 16, 2025
03ecb95
Merge branch 'apache:main' into shredded_list_support
sdf-jkl Sep 16, 2025
158d6d7
Merge branch 'apache:main' into shredded_list_support
sdf-jkl Sep 16, 2025
d53c831
Redo test shredded array
sdf-jkl Sep 17, 2025
174e429
Merge branch 'main' of https://github.com/apache/arrow-rs into shredd…
sdf-jkl Sep 18, 2025
69de7d7
Rebuild the shredded list array
sdf-jkl Sep 19, 2025
cc6d787
Use select::take to build the output array
sdf-jkl Sep 23, 2025
8f6ad1b
Merge branch 'main' of https://github.com/apache/arrow-rs into shredd…
sdf-jkl Sep 23, 2025
bc8abd9
Merge branch 'main' of https://github.com/apache/arrow-rs into shredd…
sdf-jkl Sep 25, 2025
c0d2065
Pass one test
sdf-jkl Sep 25, 2025
85aaa3f
Merge branch 'main' of https://github.com/apache/arrow-rs into shredd…
sdf-jkl Sep 25, 2025
40b6311
Get typed values directly
sdf-jkl Sep 25, 2025
f6e88ef
Added support for utf8, largeUtf8, utf8view
sdf-jkl Oct 13, 2025
61ed178
added tests for utf8, largeUtf8, utf8view
sdf-jkl Oct 13, 2025
1fb612d
fix tests
sdf-jkl Oct 13, 2025
2b6d280
Update parquet-variant-compute/src/variant_to_arrow.rs
sdf-jkl Oct 14, 2025
398b52d
Merge branch 'main' into variant_to_arrow_utf8
sdf-jkl Oct 14, 2025
defa07b
Update parquet-variant-compute/src/variant_to_arrow.rs
sdf-jkl Oct 20, 2025
5022acd
Support LargeUtf8, Utf8-View
sdf-jkl Oct 20, 2025
ed66007
Merge branch 'main' into variant_to_arrow_utf8
sdf-jkl Oct 20, 2025
196b5d4
Fix Merge errors
sdf-jkl Oct 20, 2025
642d192
Update arrow-array/src/builder/generic_bytes_builder.rs
sdf-jkl Oct 20, 2025
76b3c80
Add docs for AVERAGE_STRING_LENGTH const
sdf-jkl Oct 21, 2025
35785d6
Merge branch 'variant_to_arrow_utf8' of https://github.com/sdf-jkl/ar…
sdf-jkl Oct 21, 2025
5914218
cargo fmt
sdf-jkl Oct 21, 2025
216d401
cargo fmt
sdf-jkl Oct 21, 2025
3aa6cf3
Merge branch 'variant_to_arrow_utf8' into shredded_list_support
sdf-jkl Oct 22, 2025
15fc8be
Merge branch 'main' of https://github.com/apache/arrow-rs into shredd…
sdf-jkl Oct 22, 2025
04b9941
Quick fix variant_get
sdf-jkl Oct 24, 2025
857f0e2
Merge branch 'main' into shredded_list_support
sdf-jkl Oct 24, 2025
1edfeca
Merge branch 'main' into shredded_list_support
sdf-jkl Nov 11, 2025
6d6793d
fix merge errors
sdf-jkl Nov 11, 2025
da528c2
Merge branch 'main' of https://github.com/apache/arrow-rs into shredd…
sdf-jkl Dec 3, 2025
71412b8
Merge branch 'main' into shredded_list_support
sdf-jkl Dec 3, 2025
0c32647
Merge branch 'main' of https://github.com/apache/arrow-rs into shredd…
sdf-jkl Feb 19, 2026
5b899d8
Merge branch 'shredded_list_support' of https://github.com/sdf-jkl/ar…
sdf-jkl Feb 19, 2026
9cd01d2
Simplify tests using shred_variant
sdf-jkl Feb 20, 2026
cecd39f
Add tests suggested by @klion26
sdf-jkl Feb 21, 2026
a776982
Fix typed and untyped values logic
sdf-jkl Feb 21, 2026
cfe7c00
Add support for LargeListArray + OBB err when safe_cast
sdf-jkl Feb 21, 2026
fc99bf0
Use ShreddingState instead of BorrowedShreddingState in ShreddedPathS…
sdf-jkl Feb 21, 2026
ccbf59b
Reuse ShreddingState methods
sdf-jkl Feb 23, 2026
cbfa058
nit fix
sdf-jkl Feb 25, 2026
cf94d43
use else if chain
sdf-jkl Feb 25, 2026
91589ad
add cast_options.safe docs
sdf-jkl Feb 25, 2026
e8e7fb1
Merge branch 'main' of https://github.com/apache/arrow-rs into shredd…
sdf-jkl Feb 26, 2026
28ec53c
support list-like arrays
sdf-jkl Feb 26, 2026
279b634
match typed value instead of donwcast attempts
sdf-jkl Mar 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions parquet-variant-compute/src/variant_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,101 @@ mod test {
let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)]));
assert_eq!(&result, &expected);
}
/// This test manually constructs a shredded variant array representing lists
/// like ["comedy", "drama"], ["horror", null] and ["comedy", "drama", "romance"]
/// as VariantArray using variant_get.
#[test]
fn test_shredded_list_field_access() {
let array = shredded_list_variant_array();

// Test: Extract the 0 index field as VariantArray first
let options = GetOptions::new_with_path(VariantPath::from(0));
let result = variant_get(&array, options).unwrap();

let result_variant: &VariantArray = result.as_any().downcast_ref().unwrap();
assert_eq!(result_variant.len(), 3);

// Row 0: expect 0 index = "comedy"
assert_eq!(result_variant.value(0), Variant::String("comedy"));
// Row 1: expect 0 index = "horror"
assert_eq!(result_variant.value(1), Variant::String("horror"));
// Row 2: expect 0 index = "comedy"
assert_eq!(result_variant.value(2), Variant::String("comedy"));
}
/// Test extracting shredded list field with type conversion
#[test]
fn test_shredded_list_as_string() {
let array = shredded_list_variant_array();

// Test: Extract the 0 index values as StringArray (type conversion)
let field = Field::new("typed_value", DataType::Utf8, false);
let options = GetOptions::new_with_path(VariantPath::from(0))
.with_as_type(Some(FieldRef::from(field)));
let result = variant_get(&array, options).unwrap();

// Should get StringArray
let expected: ArrayRef = Arc::new(StringArray::from(vec![Some("comedy"), Some("drama")]));
assert_eq!(&result, &expected);
}
/// Helper function to create a shredded variant array representing lists
///
/// This creates an array that represents:
/// Row 0: ["comedy", "drama"] ([0] is shredded, [1] is shredded - perfectly shredded)
/// Row 1: ["horror", null] ([0] is shredded, [1] is binary null - partially shredded)
/// Row 2: ["comedy", "drama", "romance"] (perfectly shredded)
///
/// The physical layout follows the shredding spec where:
/// - metadata: contains list metadata
/// - typed_value: StructArray with 0 index value
/// - value: contains fallback for
fn shredded_list_variant_array() -> ArrayRef {
// Create the base metadata for lists

// Could add this as an api for VariantList, like VariantList::from()
fn build_list_metadata(vector: Vec<Variant>) -> (Vec<u8>, Vec<u8>) {
let mut builder = parquet_variant::VariantBuilder::new();
let mut list = builder.new_list();
for value in vector {
list.append_value(value);
}
list.finish();
builder.finish()
}
let (metadata1, _) =
build_list_metadata(vec![Variant::String("comedy"), Variant::String("drama")]);

let (metadata2, _) = build_list_metadata(vec![Variant::String("horror"), Variant::Null]);

let (metadata3, _) = build_list_metadata(vec![
Variant::String("comedy"),
Variant::String("drama"),
Variant::String("romance"),
]);

// Create metadata array
let metadata_array =
BinaryViewArray::from_iter_values(vec![metadata1, metadata2, metadata3]);

// Create the untyped value array
let value_array = BinaryViewArray::from(vec![Variant::Null.as_u8_slice()]);
// Maybe I should try with an actual primitive array
let typed_value_array = StringArray::from(vec![
Some("comedy"),
Some("drama"),
Some("horror"),
Some("comedy"),
Some("drama"),
Some("romance"),
]);
// Build the main VariantArray
let main_struct = crate::variant_array::StructArrayBuilder::new()
.with_field("metadata", Arc::new(metadata_array))
.with_field("value", Arc::new(value_array))
.with_field("typed_value", Arc::new(typed_value_array))
.build();

Arc::new(VariantArray::try_new(Arc::new(main_struct)).expect("should create variant array"))
}
/// Helper function to create a shredded variant array representing objects
///
/// This creates an array that represents:
Expand Down
Loading