Skip to content

Commit 635ea62

Browse files
authored
Add Not vortex expr and use it in pruning expression conversion (#1213)
1 parent 8f5b749 commit 635ea62

File tree

12 files changed

+332
-70
lines changed

12 files changed

+332
-70
lines changed

encodings/bytebool/src/array.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ use vortex_array::encoding::ids;
99
use vortex_array::stats::StatsSet;
1010
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
1111
use vortex_array::variants::{ArrayVariants, BoolArrayTrait};
12-
use vortex_array::{impl_encoding, ArrayTrait, Canonical, IntoCanonical, TypedArray};
12+
use vortex_array::{
13+
impl_encoding, Array, ArrayTrait, Canonical, IntoArray, IntoCanonical, TypedArray,
14+
};
1315
use vortex_buffer::Buffer;
1416
use vortex_dtype::DType;
1517
use vortex_error::{VortexExpect as _, VortexResult};
@@ -90,6 +92,14 @@ impl ArrayVariants for ByteBoolArray {
9092
}
9193

9294
impl BoolArrayTrait for ByteBoolArray {
95+
fn invert(&self) -> VortexResult<Array> {
96+
ByteBoolArray::try_from_vec(
97+
self.maybe_null_slice().iter().map(|v| !v).collect(),
98+
self.validity(),
99+
)
100+
.map(|a| a.into_array())
101+
}
102+
93103
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
94104
todo!()
95105
}

encodings/roaring/src/boolean/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ impl ArrayVariants for RoaringBoolArray {
9797
}
9898

9999
impl BoolArrayTrait for RoaringBoolArray {
100+
fn invert(&self) -> VortexResult<Array> {
101+
RoaringBoolArray::try_new(self.bitmap().flip(0..(self.len() as u32)), self.len())
102+
.map(|a| a.into_array())
103+
}
104+
100105
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
101106
todo!()
102107
}

encodings/runend-bool/src/array.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ use vortex_array::stats::{ArrayStatistics, ArrayStatisticsCompute, StatsSet};
99
use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
1010
use vortex_array::variants::{ArrayVariants, BoolArrayTrait};
1111
use vortex_array::{
12-
impl_encoding, Array, ArrayDType, ArrayTrait, Canonical, IntoArrayVariant, IntoCanonical,
12+
impl_encoding, Array, ArrayDType, ArrayTrait, Canonical, IntoArray, IntoArrayVariant,
13+
IntoCanonical,
1314
};
1415
use vortex_dtype::{DType, PType};
1516
use vortex_error::{vortex_bail, VortexExpect as _, VortexResult};
@@ -115,6 +116,11 @@ impl RunEndBoolArray {
115116
}
116117

117118
impl BoolArrayTrait for RunEndBoolArray {
119+
fn invert(&self) -> VortexResult<Array> {
120+
RunEndBoolArray::try_new(self.ends(), !self.start(), self.validity())
121+
.map(|a| a.into_array())
122+
}
123+
118124
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
119125
todo!()
120126
}

vortex-array/src/array/bool/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::encoding::ids;
1313
use crate::stats::StatsSet;
1414
use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
1515
use crate::variants::{ArrayVariants, BoolArrayTrait};
16-
use crate::{impl_encoding, ArrayTrait, Canonical, IntoCanonical, TypedArray};
16+
use crate::{impl_encoding, Array, ArrayTrait, Canonical, IntoArray, IntoCanonical, TypedArray};
1717

1818
mod accessors;
1919
mod compute;
@@ -96,6 +96,10 @@ impl ArrayVariants for BoolArray {
9696
}
9797

9898
impl BoolArrayTrait for BoolArray {
99+
fn invert(&self) -> VortexResult<Array> {
100+
BoolArray::try_new(!&self.boolean_buffer(), self.validity()).map(|a| a.into_array())
101+
}
102+
99103
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
100104
Box::new(BitIndexIterator::new(self.buffer(), 0, self.len()))
101105
}

vortex-array/src/array/chunked/variants.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,20 @@ impl ArrayVariants for ChunkedArray {
4747
impl NullArrayTrait for ChunkedArray {}
4848

4949
impl BoolArrayTrait for ChunkedArray {
50+
fn invert(&self) -> VortexResult<Array> {
51+
let chunks = self
52+
.chunks()
53+
.map(|c| {
54+
c.with_dyn(|a| {
55+
a.as_bool_array()
56+
.ok_or_else(|| vortex_err!("Child was not a bool array"))
57+
.and_then(|b| b.invert())
58+
})
59+
})
60+
.collect::<VortexResult<Vec<_>>>()?;
61+
ChunkedArray::try_new(chunks, self.dtype().clone()).map(|a| a.into_array())
62+
}
63+
5064
fn maybe_null_indices_iter(&self) -> Box<dyn Iterator<Item = usize>> {
5165
todo!()
5266
}

vortex-array/src/array/sparse/variants.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,21 @@ impl ArrayVariants for SparseArray {
4848
impl NullArrayTrait for SparseArray {}
4949

5050
impl BoolArrayTrait for SparseArray {
51+
fn invert(&self) -> VortexResult<Array> {
52+
let inverted_fill = self.fill_value().as_bool()?.map(|v| !v);
53+
SparseArray::try_new(
54+
self.indices(),
55+
self.values().with_dyn(|a| {
56+
a.as_bool_array()
57+
.ok_or_else(|| vortex_err!("Not a bool array"))
58+
.and_then(|b| b.invert())
59+
})?,
60+
self.len(),
61+
inverted_fill.into(),
62+
)
63+
.map(|a| a.into_array())
64+
}
65+
5166
fn maybe_null_indices_iter(&self) -> Box<dyn Iterator<Item = usize>> {
5267
Box::new(self.resolved_indices().into_iter())
5368
}
@@ -120,3 +135,33 @@ impl ExtensionArrayTrait for SparseArray {
120135
.into_array()
121136
}
122137
}
138+
139+
#[cfg(test)]
140+
mod tests {
141+
use crate::array::{BoolArray, PrimitiveArray, SparseArray};
142+
use crate::{IntoArray, IntoArrayVariant};
143+
144+
#[test]
145+
fn invert_bools_non_null_fill() {
146+
let sparse_bools = SparseArray::try_new(
147+
PrimitiveArray::from(vec![0u64]).into_array(),
148+
BoolArray::from(vec![false]).into_array(),
149+
2,
150+
true.into(),
151+
)
152+
.unwrap()
153+
.into_array();
154+
let inverted = sparse_bools
155+
.with_dyn(|a| a.as_bool_array_unchecked().invert())
156+
.unwrap();
157+
assert_eq!(
158+
inverted
159+
.into_bool()
160+
.unwrap()
161+
.boolean_buffer()
162+
.iter()
163+
.collect::<Vec<_>>(),
164+
vec![true, false]
165+
);
166+
}
167+
}

vortex-array/src/variants.rs

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,14 @@
33
//! When callers only want to make assumptions about the DType, and not about any specific
44
//! encoding, they can use these traits to write encoding-agnostic code.
55
6-
use std::ops::Not;
76
use std::sync::Arc;
87

98
use vortex_dtype::field::Field;
109
use vortex_dtype::{DType, ExtDType, FieldNames, PType};
1110
use vortex_error::{vortex_panic, VortexExpect as _, VortexResult};
1211

13-
use crate::array::BoolArray;
1412
use crate::iter::{AccessorRef, VectorizedArrayIter};
15-
use crate::{Array, ArrayTrait, IntoArray, IntoArrayVariant};
13+
use crate::{Array, ArrayTrait};
1614

1715
pub trait ArrayVariants {
1816
fn as_null_array(&self) -> Option<&dyn NullArrayTrait> {
@@ -90,15 +88,7 @@ pub trait BoolArrayTrait: ArrayTrait {
9088
/// True -> False
9189
/// False -> True
9290
/// Null -> Null
93-
fn invert(&self) -> VortexResult<Array>
94-
where
95-
Self: Clone,
96-
{
97-
let bool_array = self.clone().into_bool()?;
98-
let validity = bool_array.validity();
99-
100-
BoolArray::try_new(bool_array.boolean_buffer().not(), validity).map(|a| a.into_array())
101-
}
91+
fn invert(&self) -> VortexResult<Array>;
10292

10393
fn true_count(&self) -> usize {
10494
self.statistics()

vortex-expr/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ mod column;
99
pub mod datafusion;
1010
mod identity;
1111
mod literal;
12+
mod not;
1213
mod operators;
1314
mod select;
1415

1516
pub use binary::*;
1617
pub use column::*;
1718
pub use identity::*;
1819
pub use literal::*;
20+
pub use not::*;
1921
pub use operators::*;
2022
pub use select::*;
2123
use vortex_array::Array;

vortex-expr/src/literal.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ impl Literal {
1616
pub fn new(value: Scalar) -> Self {
1717
Self { value }
1818
}
19+
20+
pub fn value(&self) -> &Scalar {
21+
&self.value
22+
}
1923
}
2024

2125
impl VortexExpr for Literal {

vortex-expr/src/not.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
use std::any::Any;
2+
use std::sync::Arc;
3+
4+
use vortex_array::aliases::hash_set::HashSet;
5+
use vortex_array::Array;
6+
use vortex_dtype::field::Field;
7+
use vortex_error::{vortex_err, VortexResult};
8+
9+
use crate::{unbox_any, VortexExpr};
10+
11+
#[derive(Debug)]
12+
pub struct Not {
13+
child: Arc<dyn VortexExpr>,
14+
}
15+
16+
impl Not {
17+
pub fn new(child: Arc<dyn VortexExpr>) -> Self {
18+
Self { child }
19+
}
20+
21+
pub fn child(&self) -> &Arc<dyn VortexExpr> {
22+
&self.child
23+
}
24+
}
25+
26+
impl VortexExpr for Not {
27+
fn as_any(&self) -> &dyn Any {
28+
self
29+
}
30+
31+
fn evaluate(&self, batch: &Array) -> VortexResult<Array> {
32+
let child_result = self.child.evaluate(batch)?;
33+
child_result.with_dyn(|a| {
34+
a.as_bool_array()
35+
.ok_or_else(|| vortex_err!("Child was not a bool array"))
36+
.and_then(|b| b.invert())
37+
})
38+
}
39+
40+
fn collect_references<'a>(&'a self, references: &mut HashSet<&'a Field>) {
41+
self.child.collect_references(references)
42+
}
43+
}
44+
45+
impl PartialEq<dyn Any> for Not {
46+
fn eq(&self, other: &dyn Any) -> bool {
47+
unbox_any(other)
48+
.downcast_ref::<Self>()
49+
.map(|x| x.child.eq(&self.child))
50+
.unwrap_or(false)
51+
}
52+
}
53+
54+
#[cfg(test)]
55+
mod tests {
56+
use std::sync::Arc;
57+
58+
use vortex_array::array::BoolArray;
59+
use vortex_array::IntoArrayVariant;
60+
61+
use crate::{Identity, Not, VortexExpr};
62+
63+
#[test]
64+
fn invert_booleans() {
65+
let not_expr = Not::new(Arc::new(Identity));
66+
let bools = BoolArray::from(vec![false, true, false, false, true, true]);
67+
assert_eq!(
68+
not_expr
69+
.evaluate(bools.as_ref())
70+
.unwrap()
71+
.into_bool()
72+
.unwrap()
73+
.boolean_buffer()
74+
.iter()
75+
.collect::<Vec<_>>(),
76+
vec![true, false, true, true, false, false]
77+
);
78+
}
79+
}

0 commit comments

Comments
 (0)