Skip to content

Commit 5d85587

Browse files
authored
Lazy take for operators (#5692)
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 54b7ee0 commit 5d85587

File tree

4 files changed

+47
-46
lines changed

4 files changed

+47
-46
lines changed

vortex-array/src/array/mod.rs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use crate::DynArrayHash;
3131
use crate::arrays::BoolVTable;
3232
use crate::arrays::ConstantVTable;
3333
use crate::arrays::DecimalVTable;
34+
use crate::arrays::DictArray;
3435
use crate::arrays::ExtensionVTable;
3536
use crate::arrays::FilterArray;
3637
use crate::arrays::FixedSizeListVTable;
@@ -96,8 +97,11 @@ pub trait Array:
9697
/// Performs a constant-time slice of the array.
9798
fn slice(&self, range: Range<usize>) -> ArrayRef;
9899

99-
/// Performs a constant-time filter of the array.
100-
fn filter(&self, mask: &Mask) -> VortexResult<ArrayRef>;
100+
/// Wraps the array in a [`FilterArray`] such that it is logically filtered by the given mask.
101+
fn filter(&self, mask: Mask) -> VortexResult<ArrayRef>;
102+
103+
/// Wraps the array in a [`DictArray`] such that it is logically taken by the given indices.
104+
fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef>;
101105

102106
/// Fetch the scalar at the given index.
103107
///
@@ -232,10 +236,14 @@ impl Array for Arc<dyn Array> {
232236
self.as_ref().slice(range)
233237
}
234238

235-
fn filter(&self, mask: &Mask) -> VortexResult<ArrayRef> {
239+
fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
236240
self.as_ref().filter(mask)
237241
}
238242

243+
fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
244+
self.as_ref().take(indices)
245+
}
246+
239247
#[inline]
240248
fn scalar_at(&self, index: usize) -> Scalar {
241249
self.as_ref().scalar_at(index)
@@ -505,10 +513,13 @@ impl<V: VTable> Array for ArrayAdapter<V> {
505513
sliced
506514
}
507515

508-
fn filter(&self, mask: &Mask) -> VortexResult<ArrayRef> {
516+
fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
509517
vortex_ensure!(self.len() == mask.len(), "Filter mask length mismatch");
510-
Ok(V::filter(&self.0, mask)?
511-
.unwrap_or_else(|| FilterArray::new(self.to_array(), mask.clone()).into_array()))
518+
Ok(FilterArray::new(self.to_array(), mask).into_array())
519+
}
520+
521+
fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
522+
Ok(DictArray::try_new(indices, self.to_array())?.into_array())
512523
}
513524

514525
fn scalar_at(&self, index: usize) -> Scalar {

vortex-array/src/vtable/mod.rs

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,8 @@ pub use visitor::*;
2626
use vortex_buffer::BufferHandle;
2727
use vortex_dtype::DType;
2828
use vortex_error::VortexResult;
29-
use vortex_mask::Mask;
3029

3130
use crate::Array;
32-
use crate::ArrayRef;
3331
use crate::IntoArray;
3432
use crate::VectorExecutor;
3533
use crate::kernel::BindCtx;
@@ -152,21 +150,6 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug {
152150
canonical.into_array().execute_vector(&session)
153151
}))
154152
}
155-
156-
/// Return an array filtered using the given mask.
157-
///
158-
/// This should be implemented if the array is able to push down filtering operations to its
159-
/// children. If not, return `Ok(None)` and the caller will handle filtering.
160-
///
161-
/// NOTE: in the future, filter push-down will happen over the physical execution plan in
162-
/// order to avoid cases where pushing down filters actually becomes more expensive. This
163-
/// happens because filtering is very slow, and performing it in every leaf of a tree
164-
/// can be more expensive than filtering once at the root.
165-
fn filter(array: &Self::Array, mask: &Mask) -> VortexResult<Option<ArrayRef>> {
166-
_ = array;
167-
_ = mask;
168-
Ok(None)
169-
}
170153
}
171154

172155
/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.

vortex-layout/src/layouts/dict/reader.rs

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use vortex_array::compute::min_max;
2121
use vortex_array::compute::take;
2222
use vortex_array::expr::Expression;
2323
use vortex_array::expr::root;
24+
use vortex_array::mask::MaskExecutor;
2425
use vortex_array::session::ArraySessionExt;
2526
use vortex_dtype::DType;
2627
use vortex_dtype::FieldMask;
@@ -183,35 +184,41 @@ impl LayoutReader for DictReader {
183184
MaskFuture::new_true(mask.len()),
184185
)?;
185186

187+
let session = self.session.clone();
188+
186189
Ok(MaskFuture::new(mask.len(), async move {
187190
// Join on the I/O futures first, before the mask.
188191
let (codes, values) = try_join!(codes_eval, values_eval.map_err(VortexError::from))?;
189192
let mask = mask.await?;
190193

191-
// Short-circuit when the values are all true/false.
192-
if values.all_valid()
193-
&& let Some(MinMaxResult { min, max }) = min_max(&values)?
194-
{
195-
#[expect(clippy::bool_comparison, reason = "easy to follow")]
196-
if max.as_bool().value().vortex_expect("non null") == false {
197-
// All values are false
198-
return Ok(Mask::AllFalse(mask.len()));
199-
}
200-
#[expect(clippy::bool_comparison, reason = "easy to follow")]
201-
if min.as_bool().value().vortex_expect("not null") == true {
202-
// All values are true, but we still need to respect codes validity
203-
return Ok(mask.bitand(&codes.validity_mask()));
194+
let dict_mask = if *USE_VORTEX_OPERATORS {
195+
values.take(codes)?.execute_mask_optimized(&session)?
196+
} else {
197+
// Short-circuit when the values are all true/false.
198+
if values.all_valid()
199+
&& let Some(MinMaxResult { min, max }) = min_max(&values)?
200+
{
201+
#[expect(clippy::bool_comparison, reason = "easy to follow")]
202+
if max.as_bool().value().vortex_expect("non null") == false {
203+
// All values are false
204+
return Ok(Mask::AllFalse(mask.len()));
205+
}
206+
#[expect(clippy::bool_comparison, reason = "easy to follow")]
207+
if min.as_bool().value().vortex_expect("not null") == true {
208+
// All values are true, but we still need to respect codes validity
209+
return Ok(mask.bitand(&codes.validity_mask()));
210+
}
204211
}
205-
}
206212

207-
// Creating a mask from the dict array would canonicalize it,
208-
// it should be fine for now as long as values is already canonical,
209-
// so different row ranges do not canonicalize to the same array
210-
// multiple times.
211-
// TODO(joe): fixme casting null to false is *VERY* unsound, if the expression in the filter
212-
// can inspect nulls (e.g. `is_null`).
213-
// See `FlatEvaluation` for more details.
214-
let dict_mask = take(&values, &codes)?.try_to_mask_fill_null_false()?;
213+
// Creating a mask from the dict array would canonicalize it,
214+
// it should be fine for now as long as values is already canonical,
215+
// so different row ranges do not canonicalize to the same array
216+
// multiple times.
217+
// TODO(joe): fixme casting null to false is *VERY* unsound, if the expression in the filter
218+
// can inspect nulls (e.g. `is_null`).
219+
// See `FlatEvaluation` for more details.
220+
take(&values, &codes)?.try_to_mask_fill_null_false()?
221+
};
215222

216223
Ok(mask.bitand(&dict_mask))
217224
}))

vortex-layout/src/layouts/flat/reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ impl LayoutReader for FlatReader {
232232

233233
// Filter the array based on the row mask.
234234
if !mask.all_true() {
235-
array = array.filter(&mask)?;
235+
array = array.filter(mask)?;
236236
}
237237

238238
tracing::debug!("Project Array:\n{}", array.display_tree());

0 commit comments

Comments
 (0)