Skip to content

Commit 09949b6

Browse files
authored
Array operator optimizer (#5113)
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 7e0ad02 commit 09949b6

File tree

8 files changed

+191
-41
lines changed

8 files changed

+191
-41
lines changed

vortex-array/src/array/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ impl Array for Arc<dyn Array> {
263263
self.as_ref().statistics()
264264
}
265265

266+
// TODO(ngates): take a Vec<ArrayRef> to avoid clones
266267
fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
267268
self.as_ref().with_children(children)
268269
}

vortex-array/src/array/operator.rs

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ pub trait ArrayOperator: 'static + Send + Sync {
2424
/// Execute the array with a selection mask, producing a canonical vector.
2525
fn execute_with_selection(&self, selection: Option<&ArrayRef>) -> VortexResult<Vector>;
2626

27+
/// Optimize the array by running the optimization rules.
28+
fn reduce_children(&self) -> VortexResult<Option<ArrayRef>>;
29+
30+
/// Optimize the array by pushing down a parent array.
31+
fn reduce_parent(&self, parent: &ArrayRef, child_idx: usize) -> VortexResult<Option<ArrayRef>>;
32+
2733
/// Bind the array to a batch kernel. This is an internal function
2834
fn bind(
2935
&self,
@@ -33,6 +39,28 @@ pub trait ArrayOperator: 'static + Send + Sync {
3339
}
3440

3541
impl ArrayOperator for Arc<dyn Array> {
42+
fn execute_with_selection(&self, selection: Option<&ArrayRef>) -> VortexResult<Vector> {
43+
self.as_ref().execute_with_selection(selection)
44+
}
45+
46+
fn reduce_children(&self) -> VortexResult<Option<ArrayRef>> {
47+
self.as_ref().reduce_children()
48+
}
49+
50+
fn reduce_parent(&self, parent: &ArrayRef, child_idx: usize) -> VortexResult<Option<ArrayRef>> {
51+
self.as_ref().reduce_parent(parent, child_idx)
52+
}
53+
54+
fn bind(
55+
&self,
56+
selection: Option<&ArrayRef>,
57+
ctx: &mut dyn BindCtx,
58+
) -> VortexResult<BatchKernelRef> {
59+
self.as_ref().bind(selection, ctx)
60+
}
61+
}
62+
63+
impl<V: VTable> ArrayOperator for ArrayAdapter<V> {
3664
fn execute_with_selection(&self, selection: Option<&ArrayRef>) -> VortexResult<Vector> {
3765
if let Some(selection) = selection.as_ref() {
3866
if !matches!(selection.dtype(), DType::Bool(_)) {
@@ -49,21 +77,15 @@ impl ArrayOperator for Arc<dyn Array> {
4977
);
5078
}
5179
}
52-
self.as_ref().execute_with_selection(selection)
80+
self.bind(selection, &mut ())?.execute()
5381
}
5482

55-
fn bind(
56-
&self,
57-
selection: Option<&ArrayRef>,
58-
ctx: &mut dyn BindCtx,
59-
) -> VortexResult<BatchKernelRef> {
60-
self.as_ref().bind(selection, ctx)
83+
fn reduce_children(&self) -> VortexResult<Option<ArrayRef>> {
84+
<V::OperatorVTable as OperatorVTable<V>>::reduce_children(&self.0)
6185
}
62-
}
6386

64-
impl<V: VTable> ArrayOperator for ArrayAdapter<V> {
65-
fn execute_with_selection(&self, selection: Option<&ArrayRef>) -> VortexResult<Vector> {
66-
self.bind(selection, &mut ())?.execute()
87+
fn reduce_parent(&self, parent: &ArrayRef, child_idx: usize) -> VortexResult<Option<ArrayRef>> {
88+
<V::OperatorVTable as OperatorVTable<V>>::reduce_parent(&self.0, parent, child_idx)
6789
}
6890

6991
fn bind(

vortex-array/src/arrays/bool/vtable/operator.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ use vortex_compute::filter::Filter;
55
use vortex_error::VortexResult;
66
use vortex_vector::BoolVector;
77

8-
use crate::ArrayRef;
9-
use crate::arrays::{BoolArray, BoolVTable};
8+
use crate::arrays::{BoolArray, BoolVTable, MaskedVTable};
109
use crate::execution::{BatchKernelRef, BindCtx, kernel};
1110
use crate::vtable::{OperatorVTable, ValidityHelper};
11+
use crate::{ArrayRef, IntoArray};
1212

1313
impl OperatorVTable<BoolVTable> for BoolVTable {
1414
fn bind(
@@ -30,4 +30,23 @@ impl OperatorVTable<BoolVTable> for BoolVTable {
3030
Ok(BoolVector::try_new(bits, validity)?.into())
3131
}))
3232
}
33+
34+
fn reduce_parent(
35+
array: &BoolArray,
36+
parent: &ArrayRef,
37+
_child_idx: usize,
38+
) -> VortexResult<Option<ArrayRef>> {
39+
// Push-down masking of validity from parent MaskedVTable.
40+
if let Some(masked) = parent.as_opt::<MaskedVTable>() {
41+
return Ok(Some(
42+
BoolArray::from_bit_buffer(
43+
array.bit_buffer().clone(),
44+
array.validity().clone().and(masked.validity().clone()),
45+
)
46+
.into_array(),
47+
));
48+
}
49+
50+
Ok(None)
51+
}
3352
}

vortex-array/src/arrays/primitive/vtable/operator.rs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ use vortex_dtype::match_each_native_ptype;
66
use vortex_error::VortexResult;
77
use vortex_vector::PVector;
88

9-
use crate::ArrayRef;
10-
use crate::arrays::{PrimitiveArray, PrimitiveVTable};
9+
use crate::arrays::{MaskedVTable, PrimitiveArray, PrimitiveVTable};
1110
use crate::execution::{BatchKernelRef, BindCtx, kernel};
1211
use crate::vtable::{OperatorVTable, ValidityHelper};
12+
use crate::{ArrayRef, IntoArray};
1313

1414
impl OperatorVTable<PrimitiveVTable> for PrimitiveVTable {
1515
fn bind(
@@ -34,4 +34,24 @@ impl OperatorVTable<PrimitiveVTable> for PrimitiveVTable {
3434
}))
3535
})
3636
}
37+
38+
fn reduce_parent(
39+
array: &PrimitiveArray,
40+
parent: &ArrayRef,
41+
_child_idx: usize,
42+
) -> VortexResult<Option<ArrayRef>> {
43+
// Push-down masking of validity from parent MaskedVTable.
44+
if let Some(masked) = parent.as_opt::<MaskedVTable>() {
45+
return Ok(Some(
46+
PrimitiveArray::from_byte_buffer(
47+
array.byte_buffer().clone(),
48+
array.ptype(),
49+
array.validity().clone().and(masked.validity().clone()),
50+
)
51+
.into_array(),
52+
));
53+
}
54+
55+
Ok(None)
56+
}
3757
}

vortex-array/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ pub mod iter;
3737
mod mask_future;
3838
mod metadata;
3939
pub mod operator;
40+
pub mod optimizer;
4041
mod partial_ord;
4142
pub mod patches;
4243
pub mod pipeline;

vortex-array/src/optimizer.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use vortex_error::VortexResult;
7+
8+
use crate::vtable::VTable;
9+
use crate::{Array, ArrayRef};
10+
11+
impl dyn Array + '_ {
12+
/// Optimize this array by applying optimization rules recursively to its children in a single
13+
/// bottom-up pass.
14+
pub fn optimize(&self) -> VortexResult<ArrayRef> {
15+
let slf = self.to_array();
16+
let children = self.children();
17+
18+
let mut new_children = Vec::with_capacity(children.len());
19+
let mut children_modified = false;
20+
for (idx, child) in children.iter().enumerate() {
21+
let child = child.optimize()?;
22+
23+
// Check if the child can reduce us (its parent), and if so bail early.
24+
if let Some(reduced) = child.reduce_parent(&slf, idx)? {
25+
return Ok(reduced);
26+
}
27+
28+
if !Arc::ptr_eq(&child, &children[idx]) {
29+
children_modified = true;
30+
}
31+
new_children.push(child);
32+
}
33+
34+
if children_modified {
35+
return self.with_children(&new_children);
36+
}
37+
38+
Ok(slf)
39+
}
40+
}
41+
42+
/// An optimizer rule that tries to reduce/replace a parent array where the implementer is a
43+
/// child array in the `CHILD_IDX` position of the parent array.
44+
pub trait ReduceParent<Parent: VTable, const CHILD_IDX: usize>: VTable {
45+
/// Try to reduce/replace the given parent array based on this child array.
46+
///
47+
/// If no reduction is possible, return None.
48+
fn reduce_parent(array: &Self::Array, parent: &Parent::Array)
49+
-> VortexResult<Option<ArrayRef>>;
50+
}
51+
52+
/// A generic optimizer rule that can be applied to an array to try to optimize it.
53+
pub trait OptimizerRule {
54+
/// Try to optimize the given array, returning a replacement if successful.
55+
///
56+
/// If no optimization is possible, return None.
57+
fn optimize(&self, array: &ArrayRef) -> VortexResult<Option<ArrayRef>>;
58+
}
59+
60+
#[cfg(test)]
61+
mod tests {
62+
use vortex_buffer::{bitbuffer, buffer};
63+
use vortex_dtype::PTypeDowncast;
64+
use vortex_vector::VectorOps;
65+
66+
use crate::arrays::{BoolArray, MaskedArray, PrimitiveArray};
67+
use crate::validity::Validity;
68+
use crate::{ArrayOperator, IntoArray};
69+
70+
#[test]
71+
fn test_masked_pushdown() {
72+
let array = PrimitiveArray::from_iter([0u32, 1, 2, 3]);
73+
assert!(!array.dtype().is_nullable());
74+
75+
let masked = MaskedArray::try_new(
76+
array.into_array(),
77+
Validity::Array(BoolArray::from(bitbuffer![0 1 0 1]).into_array()),
78+
)
79+
.unwrap();
80+
81+
let result = masked.optimize().unwrap();
82+
assert_eq!(masked.dtype(), result.dtype());
83+
assert!(result.dtype().is_nullable());
84+
85+
let vector = result.execute().unwrap().into_primitive().into_u32();
86+
assert_eq!(vector.elements(), &buffer![0, 1, 2, 3]);
87+
assert_eq!(vector.validity().to_bit_buffer(), bitbuffer![0 1 0 1]);
88+
}
89+
}

vortex-array/src/vtable/operator.rs

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,29 @@ pub trait OperatorVTable<V: VTable> {
2020
Ok(None)
2121
}
2222

23+
/// Bind the array for execution in batch mode.
24+
///
25+
/// This function should return a [`BatchKernelRef`] that can be used to execute the array in
26+
/// batch mode.
27+
///
28+
/// The selection parameter is a non-nullable boolean array that indicates which rows to
29+
/// return. i.e. the result of the kernel should be a vector whose length is equal to the
30+
/// true count of the selection array.
31+
///
32+
/// The context should be used to bind child arrays in order to support common subtree
33+
/// elimination. See also the utility functions on the `BindCtx` for efficiently extracting
34+
/// common objects such as a [`vortex_mask::Mask`].
35+
fn bind(
36+
array: &V::Array,
37+
_selection: Option<&ArrayRef>,
38+
_ctx: &mut dyn BindCtx,
39+
) -> VortexResult<BatchKernelRef> {
40+
vortex_bail!(
41+
"Bind is not yet implemented for {} arrays",
42+
array.encoding_id()
43+
)
44+
}
45+
2346
/// Attempt to optimize this array by analyzing its children.
2447
///
2548
/// For example, if all the children are constant, this function should perform constant
@@ -47,34 +70,11 @@ pub trait OperatorVTable<V: VTable> {
4770
/// Returns `None` if no optimization is possible.
4871
fn reduce_parent(
4972
_array: &V::Array,
50-
_parent: ArrayRef,
73+
_parent: &ArrayRef,
5174
_child_idx: usize,
5275
) -> VortexResult<Option<ArrayRef>> {
5376
Ok(None)
5477
}
55-
56-
/// Bind the array for execution in batch mode.
57-
///
58-
/// This function should return a [`BatchKernelRef`] that can be used to execute the array in
59-
/// batch mode.
60-
///
61-
/// The selection parameter is a non-nullable boolean array that indicates which rows to
62-
/// return. i.e. the result of the kernel should be a vector whose length is equal to the
63-
/// true count of the selection array.
64-
///
65-
/// The context should be used to bind child arrays in order to support common subtree
66-
/// elimination. See also the utility functions on the `BindCtx` for efficiently extracting
67-
/// common objects such as a [`vortex_mask::Mask`].
68-
fn bind(
69-
array: &V::Array,
70-
_selection: Option<&ArrayRef>,
71-
_ctx: &mut dyn BindCtx,
72-
) -> VortexResult<BatchKernelRef> {
73-
vortex_bail!(
74-
"Bind is not yet implemented for {} arrays",
75-
array.encoding_id()
76-
)
77-
}
7878
}
7979

8080
impl<V: VTable> OperatorVTable<V> for NotSupported {

vortex-vector/src/vector.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ pub enum Vector {
4343
// FixedList(FixedListVector),
4444
/// Vectors of Struct elements.
4545
Struct(StructVector),
46-
// Extension
47-
// Extension(ExtensionVector),
4846
}
4947

5048
impl VectorOps for Vector {

0 commit comments

Comments
 (0)