Skip to content

Commit efcb565

Browse files
committed
feat: Add push_validity_into_children methods to StructArray
Add methods to push struct-level validity into child fields: - push_validity_into_children(preserve_struct_validity: bool) - push_validity_into_children_default() - convenience method with preserve=false The functionality propagates null information from struct level down to individual fields, with options to preserve or remove the struct-level validity. Includes comprehensive tests covering all scenarios: - preserve_struct_validity = true - preserve_struct_validity = false (default) - no nulls edge case Signed-off-by: amorynan <[email protected]>
1 parent 1e0f608 commit efcb565

File tree

2 files changed

+287
-0
lines changed

2 files changed

+287
-0
lines changed

vortex-array/src/arrays/struct_/array.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use vortex_error::VortexExpect;
1313
use vortex_error::VortexResult;
1414
use vortex_error::vortex_bail;
1515
use vortex_error::vortex_err;
16+
use vortex_mask::Mask;
1617

1718
use crate::Array;
1819
use crate::ArrayRef;
@@ -451,4 +452,112 @@ impl StructArray {
451452

452453
Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone())
453454
}
455+
456+
/// Push the struct-level validity into the children fields.
457+
///
458+
/// This method takes the top-level validity of the struct array and applies it to each child field
459+
/// using a mask operation. The resulting struct array will have the struct-level nulls propagated
460+
/// down to the individual fields.
461+
///
462+
/// # Parameters
463+
///
464+
/// * `preserve_struct_validity` - If true, the new struct array retains the original struct-level
465+
/// validity. If false, the new struct array has `Validity::AllValid` since all null information
466+
/// is now contained within the individual fields.
467+
///
468+
/// # Returns
469+
///
470+
/// A new `StructArray` where each child field has been masked with the struct's validity.
471+
///
472+
/// # Examples
473+
///
474+
/// ```
475+
/// use vortex_array::arrays::StructArray;
476+
/// use vortex_array::validity::Validity;
477+
/// use vortex_array::IntoArray;
478+
/// use vortex_buffer::buffer;
479+
///
480+
/// // Create struct with top-level nulls
481+
/// let struct_array = StructArray::try_new(
482+
/// ["a", "b"].into(),
483+
/// vec![
484+
/// buffer![1i32, 2i32, 3i32].into_array(),
485+
/// buffer![10i32, 20i32, 30i32].into_array(),
486+
/// ],
487+
/// 3,
488+
/// Validity::from_iter([true, false, true]), // row 1 is null
489+
/// ).unwrap();
490+
///
491+
/// // Push validity into children, preserving struct validity
492+
/// let pushed = struct_array.push_validity_into_children(true).unwrap();
493+
/// // pushed.fields()[0] now has nulls at position 1
494+
/// // pushed.fields()[1] now has nulls at position 1
495+
/// // pushed.validity still shows row 1 as null
496+
///
497+
/// // Push validity into children, removing struct validity
498+
/// let pushed_no_struct = struct_array.push_validity_into_children(false).unwrap();
499+
/// // pushed_no_struct.fields()[0] now has nulls at position 1
500+
/// // pushed_no_struct.fields()[1] now has nulls at position 1
501+
/// // pushed_no_struct.validity is AllValid
502+
/// ```
503+
/// Push validity into children with default behavior (preserve_struct_validity = false).
504+
///
505+
/// This is equivalent to calling `push_validity_into_children(false)`.
506+
pub fn push_validity_into_children_default(&self) -> VortexResult<Self> {
507+
self.push_validity_into_children(false)
508+
}
509+
510+
511+
pub fn push_validity_into_children(&self, preserve_struct_validity: bool) -> VortexResult<Self> {
512+
use crate::compute::mask;
513+
514+
// Get the struct-level validity mask
515+
let struct_validity_mask = self.validity_mask();
516+
517+
// If the struct has no nulls, return a clone
518+
if struct_validity_mask.all_true() {
519+
return if preserve_struct_validity {
520+
Ok(self.clone())
521+
} else {
522+
// Remove struct validity if requested
523+
Self::try_new(
524+
self.names().clone(),
525+
self.fields().clone(),
526+
self.len(),
527+
Validity::AllValid,
528+
)
529+
};
530+
}
531+
532+
// Apply the struct validity mask to each child field
533+
// We want to set nulls where the struct is null (i.e., where struct_validity_mask is false)
534+
// So we need to invert the mask: where struct is invalid, set child to invalid
535+
let null_mask = struct_validity_mask.iter_bools(|iter| {
536+
Mask::from_iter(iter.map(|valid| !valid)) // invert: valid->invalid, invalid->valid
537+
});
538+
539+
let masked_fields: Vec<ArrayRef> = self
540+
.fields()
541+
.iter()
542+
.map(|field| {
543+
// Use the mask function to apply null positions to each field
544+
mask(field.as_ref(), &null_mask)
545+
})
546+
.collect::<VortexResult<Vec<_>>>()?;
547+
548+
// Determine the new struct validity (default to false = remove struct validity)
549+
let new_struct_validity = if preserve_struct_validity {
550+
self.validity.clone()
551+
} else {
552+
Validity::AllValid
553+
};
554+
555+
// Construct the new struct array
556+
Self::try_new(
557+
self.names().clone(),
558+
masked_fields,
559+
self.len(),
560+
new_struct_validity,
561+
)
562+
}
454563
}

vortex-array/src/arrays/struct_/tests.rs

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use vortex_dtype::FieldName;
77
use vortex_dtype::FieldNames;
88
use vortex_dtype::Nullability;
99
use vortex_dtype::PType;
10+
use vortex_scalar::Scalar;
1011

1112
use crate::Array;
1213
use crate::IntoArray;
@@ -150,3 +151,180 @@ fn test_uncompressed_size_in_bytes() {
150151
assert_eq!(canonical_size, 2);
151152
assert_eq!(uncompressed_size, Some(4000));
152153
}
154+
155+
#[test]
156+
fn test_push_validity_into_children_preserve_struct() {
157+
// Create struct with top-level nulls
158+
// structArray : [a, b]
159+
// fields: [1, 2, 3] (a), [10, 20, 30] (b)
160+
// validity: [true, false, true]
161+
// row 1 is null at struct level
162+
let struct_array = StructArray::try_new(
163+
["a", "b"].into(),
164+
vec![
165+
buffer![1i32, 2i32, 3i32].into_array(),
166+
buffer![10i32, 20i32, 30i32].into_array(),
167+
],
168+
3,
169+
Validity::from_iter([true, false, true]), // row 1 is null at struct level
170+
)
171+
.unwrap();
172+
173+
// Push validity into children, preserving struct validity
174+
let pushed = struct_array.push_validity_into_children(true).unwrap();
175+
176+
// Check that struct validity is preserved
177+
assert_eq!(pushed.validity_mask(), struct_array.validity_mask());
178+
179+
// Check that children now have nulls where struct was null
180+
let field_a = pushed.fields()[0].as_ref();
181+
let field_b = pushed.fields()[1].as_ref();
182+
183+
184+
assert!(field_a.is_valid(0));
185+
assert!(!field_a.is_valid(1)); // Should be null due to struct null
186+
assert!(field_a.is_valid(2));
187+
188+
assert!(field_b.is_valid(0));
189+
assert!(!field_b.is_valid(1)); // Should be null due to struct null
190+
assert!(field_b.is_valid(2));
191+
192+
193+
// Original values should be preserved where valid
194+
assert_eq!(field_a.scalar_at(0), 1i32.into());
195+
assert_eq!(field_a.scalar_at(2), 3i32.into());
196+
assert_eq!(field_b.scalar_at(0), 10i32.into());
197+
assert_eq!(field_b.scalar_at(2), 30i32.into());
198+
199+
200+
// Verify pushed struct array values (preserve_struct_validity = true)
201+
assert!(pushed.is_valid(0)); // Row 0 should be valid
202+
assert!(!pushed.is_valid(1)); // Row 1 should be null (preserved)
203+
assert!(pushed.is_valid(2)); // Row 2 should be valid
204+
205+
// Row 0: {a: 1, b: 10} - should be valid struct with valid fields
206+
let row0 = pushed.scalar_at(0);
207+
assert!(row0.is_valid());
208+
209+
// Row 1: null - should be null struct (preserved from original)
210+
let row1 = pushed.scalar_at(1);
211+
assert!(!row1.is_valid());
212+
213+
// Row 2: {a: 3, b: 30} - should be valid struct with valid fields
214+
let row2 = pushed.scalar_at(2);
215+
assert!(row2.is_valid());
216+
217+
}
218+
219+
#[test]
220+
fn test_push_validity_into_children_remove_struct() {
221+
222+
// Create struct with top-level nulls
223+
let struct_array = StructArray::try_new(
224+
["a", "b"].into(),
225+
vec![
226+
buffer![1i32, 2i32, 3i32].into_array(),
227+
buffer![10i32, 20i32, 30i32].into_array(),
228+
],
229+
3,
230+
Validity::from_iter([true, false, true]), // row 1 is null at struct level
231+
)
232+
.unwrap();
233+
234+
235+
// Push validity into children, removing struct validity when default behavior is used (preserve_struct_validity = false)
236+
let pushed = struct_array.push_validity_into_children_default().unwrap();
237+
238+
239+
// Check that struct validity is now AllValid
240+
assert!(pushed.validity_mask().all_true());
241+
242+
// Check that children still have nulls where struct was null
243+
let field_a = pushed.fields()[0].as_ref();
244+
let field_b = pushed.fields()[1].as_ref();
245+
246+
247+
assert!(field_a.is_valid(0));
248+
assert!(!field_a.is_valid(1)); // Should be null due to struct null
249+
assert!(field_a.is_valid(2));
250+
251+
assert!(field_b.is_valid(0));
252+
assert!(!field_b.is_valid(1)); // Should be null due to struct null
253+
assert!(field_b.is_valid(2));
254+
255+
256+
// Original values should be preserved where valid
257+
assert_eq!(field_a.scalar_at(0), 1i32.into());
258+
assert_eq!(field_a.scalar_at(2), 3i32.into());
259+
assert_eq!(field_b.scalar_at(0), 10i32.into());
260+
assert_eq!(field_b.scalar_at(2), 30i32.into());
261+
262+
// Verify null values using proper null scalar comparison
263+
use vortex_dtype::{DType, Nullability, PType};
264+
let null_i32_scalar = Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable));
265+
assert_eq!(field_a.scalar_at(1), null_i32_scalar);
266+
assert_eq!(field_b.scalar_at(1), null_i32_scalar);
267+
268+
// Alternative: check if the scalar is null
269+
assert!(!field_a.scalar_at(1).is_valid());
270+
assert!(!field_b.scalar_at(1).is_valid());
271+
272+
// Verify pushed struct array values (preserve_struct_validity = false)
273+
assert!(pushed.is_valid(0)); // Row 0 should be valid
274+
assert!(pushed.is_valid(1)); // Row 1 should be valid (validity removed)
275+
assert!(pushed.is_valid(2)); // Row 2 should be valid
276+
277+
// Row 0: {a: 1, b: 10} - should be valid struct with valid fields
278+
let row0 = pushed.scalar_at(0);
279+
assert!(row0.is_valid());
280+
281+
// Row 1: {a: null, b: null} - should be valid struct but with null fields
282+
let row1 = pushed.scalar_at(1);
283+
assert!(row1.is_valid()); // Struct is valid, but fields are null
284+
285+
// Row 2: {a: 3, b: 30} - should be valid struct with valid fields
286+
let row2 = pushed.scalar_at(2);
287+
assert!(row2.is_valid());
288+
289+
}
290+
291+
#[test]
292+
fn test_push_validity_into_children_no_nulls() {
293+
// Create struct without any nulls
294+
let struct_array = StructArray::try_new(
295+
["a", "b"].into(),
296+
vec![
297+
buffer![1i32, 2i32, 3i32].into_array(),
298+
buffer![10i32, 20i32, 30i32].into_array(),
299+
],
300+
3,
301+
Validity::AllValid,
302+
)
303+
.unwrap();
304+
305+
306+
// Push validity into children (should be no-op when preserve=true)
307+
let pushed_preserve = struct_array.push_validity_into_children(true).unwrap();
308+
assert_eq!(pushed_preserve.validity_mask(), struct_array.validity_mask());
309+
310+
// Push validity into children (should change validity to AllValid when preserve=false)
311+
let pushed_remove = struct_array.push_validity_into_children(false).unwrap();
312+
assert!(pushed_remove.validity_mask().all_true());
313+
314+
// Fields should remain unchanged
315+
for i in 0..struct_array.fields().len() {
316+
assert_eq!(
317+
pushed_preserve.fields()[i].scalar_at(0),
318+
struct_array.fields()[i].scalar_at(0)
319+
);
320+
assert_eq!(
321+
pushed_preserve.fields()[i].scalar_at(1),
322+
struct_array.fields()[i].scalar_at(1)
323+
);
324+
assert_eq!(
325+
pushed_preserve.fields()[i].scalar_at(2),
326+
struct_array.fields()[i].scalar_at(2)
327+
);
328+
}
329+
330+
}

0 commit comments

Comments
 (0)