Skip to content

Commit aedde43

Browse files
authored
fix: DictMaskEvaluation to respect validity (#3894)
Fixes https://github.com/vortex-data/vortex/actions/runs/16310529975/job/46065222914 Signed-off-by: blaginin <[email protected]>
1 parent 6271c6e commit aedde43

File tree

1 file changed

+68
-2
lines changed

1 file changed

+68
-2
lines changed

vortex-layout/src/layouts/dict/reader.rs

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,9 @@ impl MaskEvaluation for DictMaskEvaluation {
189189
return Ok(Mask::AllFalse(mask.len()));
190190
}
191191
if min.as_bool().value().unwrap_or(false) {
192-
// All values are true
193-
return Ok(mask);
192+
// All values are true, but we still need to respect codes validity
193+
let codes = self.codes_eval.invoke(Mask::new_true(mask.len())).await?;
194+
return Ok(mask.bitand(&codes.validity_mask()?));
194195
}
195196
}
196197

@@ -243,6 +244,7 @@ mod tests {
243244
use arcref::ArcRef;
244245
use futures::executor::block_on;
245246
use futures::stream;
247+
use rstest::rstest;
246248
use vortex_array::arrays::{StructArray, VarBinArray};
247249
use vortex_array::arrow::IntoArrowArray;
248250
use vortex_array::validity::Validity;
@@ -342,6 +344,70 @@ mod tests {
342344
assert_eq!(&actual, &expected);
343345
}
344346

347+
#[rstest]
348+
#[case::all_true_case(
349+
vec![Some(""), None, Some("")], // Dict values: [""]
350+
"", // Filter for empty string
351+
vec![true, false, true], // Expected: nulls excluded, all dict values match
352+
)]
353+
#[case::all_false_case(
354+
vec![Some("x"), None, Some("x")], // Dict values: ["x"]
355+
"", // Filter for empty string
356+
vec![false, false, false], // Expected: all false, no dict values match
357+
)]
358+
#[tokio::test]
359+
async fn shortpathes_filtering(
360+
#[case] data: Vec<Option<&str>>,
361+
#[case] filter_value: &str,
362+
#[case] expected: Vec<bool>,
363+
) {
364+
let strategy = DictStrategy::new(
365+
ArcRef::from(Arc::from(FlatLayoutStrategy::default()) as Arc<dyn LayoutStrategy>),
366+
ArcRef::from(Arc::from(FlatLayoutStrategy::default()) as Arc<dyn LayoutStrategy>),
367+
ArcRef::from(Arc::from(FlatLayoutStrategy::default()) as Arc<dyn LayoutStrategy>),
368+
DictLayoutOptions::default(),
369+
Arc::new(LocalExecutor),
370+
);
371+
372+
let array = VarBinArray::from_iter(data, DType::Utf8(Nullability::Nullable)).to_array();
373+
374+
let ctx = ArrayContext::empty();
375+
let segments = TestSegments::default();
376+
let layout: LayoutRef = block_on(
377+
strategy.write_stream(
378+
&ctx,
379+
SequenceWriter::new(Box::new(segments.clone())),
380+
SequentialStreamAdapter::new(
381+
DType::Utf8(Nullability::Nullable),
382+
stream::once(async move { Ok((SequenceId::root().downgrade(), array)) }),
383+
)
384+
.sendable(),
385+
),
386+
)
387+
.unwrap();
388+
389+
let filter = vortex_expr::eq(
390+
root(),
391+
vortex_expr::lit(vortex_scalar::Scalar::utf8(
392+
filter_value,
393+
Nullability::Nullable,
394+
)),
395+
);
396+
let mask = layout
397+
.new_reader("".into(), Arc::from(segments))
398+
.unwrap()
399+
.filter_evaluation(&(0..3), &filter)
400+
.unwrap()
401+
.invoke(Mask::new_true(3))
402+
.await
403+
.unwrap();
404+
405+
assert_eq!(
406+
mask.to_boolean_buffer().iter().collect::<Vec<_>>(),
407+
expected
408+
);
409+
}
410+
345411
#[tokio::test]
346412
async fn reading_is_null_works() {
347413
let strategy = DictStrategy::new(

0 commit comments

Comments
 (0)