Skip to content

Commit 90df26a

Browse files
committed
add a method to push struct validity into children
1 parent 1e0f608 commit 90df26a

File tree

2 files changed

+289
-0
lines changed

2 files changed

+289
-0
lines changed

vortex-array/src/arrays/struct_/array.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use vortex_error::VortexExpect;
1313
use vortex_error::VortexResult;
1414
use vortex_error::vortex_bail;
1515
use vortex_error::vortex_err;
16+
use vortex_mask::Mask;
1617

1718
use crate::Array;
1819
use crate::ArrayRef;
@@ -451,4 +452,115 @@ impl StructArray {
451452

452453
Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone())
453454
}
455+
456+
/// Push the struct-level validity into the children fields.
457+
///
458+
/// This method takes the top-level validity of the struct array and applies it to each child field
459+
/// using a mask operation. The resulting struct array will have the struct-level nulls propagated
460+
/// down to the individual fields.
461+
///
462+
/// # Parameters
463+
///
464+
/// * `preserve_struct_validity` - If `Some(true)`, the new struct array retains the original struct-level
465+
/// validity. If `Some(false)` or `None`, the new struct array has `Validity::AllValid` since all null
466+
/// information is now contained within the individual fields. Defaults to `false` when `None`.
467+
///
468+
/// # Returns
469+
///
470+
/// A new `StructArray` where each child field has been masked with the struct's validity.
471+
///
472+
/// # Examples
473+
///
474+
/// ```
475+
/// use vortex_array::arrays::StructArray;
476+
/// use vortex_array::validity::Validity;
477+
/// use vortex_array::IntoArray;
478+
/// use vortex_buffer::buffer;
479+
///
480+
/// // Create struct with top-level nulls
481+
/// let struct_array = StructArray::try_new(
482+
/// ["a", "b"].into(),
483+
/// vec![
484+
/// buffer![1i32, 2i32, 3i32].into_array(),
485+
/// buffer![10i32, 20i32, 30i32].into_array(),
486+
/// ],
487+
/// 3,
488+
/// Validity::from_iter([true, false, true]), // row 1 is null
489+
/// ).unwrap();
490+
///
491+
/// // Push validity into children, preserving struct validity
492+
/// let pushed = struct_array.push_validity_into_children(true).unwrap();
493+
/// // pushed.fields()[0] now has nulls at position 1
494+
/// // pushed.fields()[1] now has nulls at position 1
495+
/// // pushed.validity still shows row 1 as null
496+
///
497+
/// // Push validity into children, removing struct validity (default behavior)
498+
/// let pushed_no_struct = struct_array.push_validity_into_children_default().unwrap();
499+
/// // or explicitly:
500+
/// let pushed_no_struct = struct_array.push_validity_into_children(false).unwrap();
501+
/// // pushed_no_struct.fields()[0] now has nulls at position 1
502+
/// // pushed_no_struct.fields()[1] now has nulls at position 1
503+
/// // pushed_no_struct.validity is AllValid
504+
/// ```
505+
/// Push validity into children with default behavior (preserve_struct_validity = false).
506+
///
507+
/// This is equivalent to calling `push_validity_into_children(None)` or
508+
/// `push_validity_into_children(Some(false))`.
509+
pub fn push_validity_into_children_default(&self) -> VortexResult<Self> {
510+
self.push_validity_into_children(false)
511+
}
512+
513+
514+
pub fn push_validity_into_children(&self, preserve_struct_validity: bool) -> VortexResult<Self> {
515+
use crate::compute::mask;
516+
517+
// Get the struct-level validity mask
518+
let struct_validity_mask = self.validity_mask();
519+
520+
// If the struct has no nulls, return a clone
521+
if struct_validity_mask.all_true() {
522+
return if preserve_struct_validity {
523+
Ok(self.clone())
524+
} else {
525+
// Remove struct validity if requested
526+
Self::try_new(
527+
self.names().clone(),
528+
self.fields().clone(),
529+
self.len(),
530+
Validity::AllValid,
531+
)
532+
};
533+
}
534+
535+
// Apply the struct validity mask to each child field
536+
// We want to set nulls where the struct is null (i.e., where struct_validity_mask is false)
537+
// So we need to invert the mask: where struct is invalid, set child to invalid
538+
let null_mask = struct_validity_mask.iter_bools(|iter| {
539+
Mask::from_iter(iter.map(|valid| !valid)) // invert: valid->invalid, invalid->valid
540+
});
541+
542+
let masked_fields: Vec<ArrayRef> = self
543+
.fields()
544+
.iter()
545+
.map(|field| {
546+
// Use the mask function to apply null positions to each field
547+
mask(field.as_ref(), &null_mask)
548+
})
549+
.collect::<VortexResult<Vec<_>>>()?;
550+
551+
// Determine the new struct validity (default to false = remove struct validity)
552+
let new_struct_validity = if preserve_struct_validity {
553+
self.validity.clone()
554+
} else {
555+
Validity::AllValid
556+
};
557+
558+
// Construct the new struct array
559+
Self::try_new(
560+
self.names().clone(),
561+
masked_fields,
562+
self.len(),
563+
new_struct_validity,
564+
)
565+
}
454566
}

vortex-array/src/arrays/struct_/tests.rs

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use vortex_dtype::FieldName;
77
use vortex_dtype::FieldNames;
88
use vortex_dtype::Nullability;
99
use vortex_dtype::PType;
10+
use vortex_scalar::Scalar;
1011

1112
use crate::Array;
1213
use crate::IntoArray;
@@ -150,3 +151,179 @@ fn test_uncompressed_size_in_bytes() {
150151
assert_eq!(canonical_size, 2);
151152
assert_eq!(uncompressed_size, Some(4000));
152153
}
154+
155+
#[test]
156+
fn test_push_validity_into_children_preserve_struct() {
157+
// Create struct with top-level nulls
158+
// structArray : [a, b]
159+
// fields: [1, 2, 3] (a), [10, 20, 30] (b)
160+
// validity: [true, false, true]
161+
// row 1 is null at struct level
162+
let struct_array = StructArray::try_new(
163+
["a", "b"].into(),
164+
vec![
165+
buffer![1i32, 2i32, 3i32].into_array(),
166+
buffer![10i32, 20i32, 30i32].into_array(),
167+
],
168+
3,
169+
Validity::from_iter([true, false, true]), // row 1 is null at struct level
170+
)
171+
.unwrap();
172+
173+
// Push validity into children, preserving struct validity
174+
let pushed = struct_array.push_validity_into_children(true).unwrap();
175+
176+
// Check that struct validity is preserved
177+
assert_eq!(pushed.validity_mask(), struct_array.validity_mask());
178+
179+
// Check that children now have nulls where struct was null
180+
let field_a = pushed.fields()[0].as_ref();
181+
let field_b = pushed.fields()[1].as_ref();
182+
183+
184+
assert!(field_a.is_valid(0));
185+
assert!(!field_a.is_valid(1)); // Should be null due to struct null
186+
assert!(field_a.is_valid(2));
187+
188+
assert!(field_b.is_valid(0));
189+
assert!(!field_b.is_valid(1)); // Should be null due to struct null
190+
assert!(field_b.is_valid(2));
191+
192+
193+
// Original values should be preserved where valid
194+
assert_eq!(field_a.scalar_at(0), 1i32.into());
195+
assert_eq!(field_a.scalar_at(2), 3i32.into());
196+
assert_eq!(field_b.scalar_at(0), 10i32.into());
197+
assert_eq!(field_b.scalar_at(2), 30i32.into());
198+
199+
200+
// Verify pushed struct array values (preserve_struct_validity = true)
201+
assert!(pushed.is_valid(0)); // Row 0 should be valid
202+
assert!(!pushed.is_valid(1)); // Row 1 should be null (preserved)
203+
assert!(pushed.is_valid(2)); // Row 2 should be valid
204+
205+
// Row 0: {a: 1, b: 10} - should be valid struct with valid fields
206+
let row0 = pushed.scalar_at(0);
207+
assert!(row0.is_valid());
208+
209+
// Row 1: null - should be null struct (preserved from original)
210+
let row1 = pushed.scalar_at(1);
211+
assert!(!row1.is_valid());
212+
213+
// Row 2: {a: 3, b: 30} - should be valid struct with valid fields
214+
let row2 = pushed.scalar_at(2);
215+
assert!(row2.is_valid());
216+
217+
}
218+
219+
#[test]
220+
fn test_push_validity_into_children_remove_struct() {
221+
222+
// Create struct with top-level nulls
223+
let struct_array = StructArray::try_new(
224+
["a", "b"].into(),
225+
vec![
226+
buffer![1i32, 2i32, 3i32].into_array(),
227+
buffer![10i32, 20i32, 30i32].into_array(),
228+
],
229+
3,
230+
Validity::from_iter([true, false, true]), // row 1 is null at struct level
231+
)
232+
.unwrap();
233+
234+
235+
// Push validity into children, removing struct validity when default behavior is used (preserve_struct_validity = false)
236+
let pushed = struct_array.push_validity_into_children_default().unwrap();
237+
238+
239+
// Check that struct validity is now AllValid
240+
assert!(pushed.validity_mask().all_true());
241+
242+
// Check that children still have nulls where struct was null
243+
let field_a = pushed.fields()[0].as_ref();
244+
let field_b = pushed.fields()[1].as_ref();
245+
246+
247+
assert!(field_a.is_valid(0));
248+
assert!(!field_a.is_valid(1)); // Should be null due to struct null
249+
assert!(field_a.is_valid(2));
250+
251+
assert!(field_b.is_valid(0));
252+
assert!(!field_b.is_valid(1)); // Should be null due to struct null
253+
assert!(field_b.is_valid(2));
254+
255+
// Original values should be preserved where valid
256+
assert_eq!(field_a.scalar_at(0), 1i32.into());
257+
assert_eq!(field_a.scalar_at(2), 3i32.into());
258+
assert_eq!(field_b.scalar_at(0), 10i32.into());
259+
assert_eq!(field_b.scalar_at(2), 30i32.into());
260+
261+
// Verify null values using proper null scalar comparison
262+
use vortex_dtype::{DType, Nullability, PType};
263+
let null_i32_scalar = Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable));
264+
assert_eq!(field_a.scalar_at(1), null_i32_scalar);
265+
assert_eq!(field_b.scalar_at(1), null_i32_scalar);
266+
267+
// Alternative: check if the scalar is null
268+
assert!(!field_a.scalar_at(1).is_valid());
269+
assert!(!field_b.scalar_at(1).is_valid());
270+
271+
// Verify pushed struct array values (preserve_struct_validity = false)
272+
assert!(pushed.is_valid(0)); // Row 0 should be valid
273+
assert!(pushed.is_valid(1)); // Row 1 should be valid (validity removed)
274+
assert!(pushed.is_valid(2)); // Row 2 should be valid
275+
276+
// Row 0: {a: 1, b: 10} - should be valid struct with valid fields
277+
let row0 = pushed.scalar_at(0);
278+
assert!(row0.is_valid());
279+
280+
// Row 1: {a: null, b: null} - should be valid struct but with null fields
281+
let row1 = pushed.scalar_at(1);
282+
assert!(row1.is_valid()); // Struct is valid, but fields are null
283+
284+
// Row 2: {a: 3, b: 30} - should be valid struct with valid fields
285+
let row2 = pushed.scalar_at(2);
286+
assert!(row2.is_valid());
287+
288+
}
289+
290+
#[test]
291+
fn test_push_validity_into_children_no_nulls() {
292+
// Create struct without any nulls
293+
let struct_array = StructArray::try_new(
294+
["a", "b"].into(),
295+
vec![
296+
buffer![1i32, 2i32, 3i32].into_array(),
297+
buffer![10i32, 20i32, 30i32].into_array(),
298+
],
299+
3,
300+
Validity::AllValid,
301+
)
302+
.unwrap();
303+
304+
305+
// Push validity into children (should be no-op when preserve=true)
306+
let pushed_preserve = struct_array.push_validity_into_children(true).unwrap();
307+
assert_eq!(pushed_preserve.validity_mask(), struct_array.validity_mask());
308+
309+
// Push validity into children (should change validity to AllValid when preserve=false)
310+
let pushed_remove = struct_array.push_validity_into_children(false).unwrap();
311+
assert!(pushed_remove.validity_mask().all_true());
312+
313+
// Fields should remain unchanged
314+
for i in 0..struct_array.fields().len() {
315+
assert_eq!(
316+
pushed_preserve.fields()[i].scalar_at(0),
317+
struct_array.fields()[i].scalar_at(0)
318+
);
319+
assert_eq!(
320+
pushed_preserve.fields()[i].scalar_at(1),
321+
struct_array.fields()[i].scalar_at(1)
322+
);
323+
assert_eq!(
324+
pushed_preserve.fields()[i].scalar_at(2),
325+
struct_array.fields()[i].scalar_at(2)
326+
);
327+
}
328+
329+
}

0 commit comments

Comments
 (0)