Skip to content

Commit 4781ca5

Browse files
authored
specialize fsst compare to empty const value (#2234)
followup to #2227, I also tried to extract some of the shared logic up to the `compute` module.
1 parent e84f4ff commit 4781ca5

File tree

6 files changed

+117
-63
lines changed

6 files changed

+117
-63
lines changed

encodings/fsst/src/compute/compare.rs

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use fsst::Symbol;
2-
use vortex_array::array::ConstantArray;
3-
use vortex_array::compute::{compare, CompareFn, Operator};
4-
use vortex_array::{Array, IntoArrayVariant};
2+
use vortex_array::array::{BoolArray, BooleanBuffer, ConstantArray};
3+
use vortex_array::compute::{compare, compare_lengths_to_empty, CompareFn, Operator};
4+
use vortex_array::variants::PrimitiveArrayTrait;
5+
use vortex_array::{Array, IntoArray, IntoArrayVariant, IntoCanonical};
56
use vortex_buffer::ByteBuffer;
6-
use vortex_dtype::DType;
7-
use vortex_error::{VortexExpect, VortexResult};
7+
use vortex_dtype::{match_each_native_ptype, DType};
8+
use vortex_error::{vortex_bail, VortexExpect, VortexResult};
89

910
use crate::{FSSTArray, FSSTEncoding};
1011

@@ -15,26 +16,61 @@ impl CompareFn<FSSTArray> for FSSTEncoding {
1516
rhs: &Array,
1617
operator: Operator,
1718
) -> VortexResult<Option<Array>> {
18-
match (rhs.as_constant(), operator) {
19-
(Some(constant), Operator::Eq | Operator::NotEq) => compare_fsst_constant(
20-
lhs,
21-
&ConstantArray::new(constant, lhs.len()),
22-
operator == Operator::Eq,
23-
)
24-
.map(Some),
19+
match rhs.as_constant() {
20+
Some(constant) => {
21+
compare_fsst_constant(lhs, &ConstantArray::new(constant, lhs.len()), operator)
22+
}
2523
// Otherwise, fall back to the default comparison behavior.
2624
_ => Ok(None),
2725
}
2826
}
2927
}
3028

31-
/// Specialized compare function implementation used when performing equals or not equals against
32-
/// a constant.
29+
/// Specialized compare function implementation used when performing against a constant
3330
fn compare_fsst_constant(
3431
left: &FSSTArray,
3532
right: &ConstantArray,
36-
equal: bool,
37-
) -> VortexResult<Array> {
33+
operator: Operator,
34+
) -> VortexResult<Option<Array>> {
35+
let rhs_scalar = right.scalar();
36+
let is_rhs_empty = match rhs_scalar.dtype() {
37+
DType::Binary(_) => rhs_scalar
38+
.as_binary()
39+
.is_empty()
40+
.vortex_expect("RHS should not be null"),
41+
DType::Utf8(_) => rhs_scalar
42+
.as_utf8()
43+
.is_empty()
44+
.vortex_expect("RHS should not be null"),
45+
_ => vortex_bail!("VarBinArray can only have type of Binary or Utf8"),
46+
};
47+
if is_rhs_empty {
48+
let buffer = match operator {
49+
// Every possible value is gte ""
50+
Operator::Gte => BooleanBuffer::new_set(left.len()),
51+
// No value is lt ""
52+
Operator::Lt => BooleanBuffer::new_unset(left.len()),
53+
_ => {
54+
let uncompressed_lengths = left
55+
.uncompressed_lengths()
56+
.into_canonical()?
57+
.into_primitive()?;
58+
match_each_native_ptype!(uncompressed_lengths.ptype(), |$P| {
59+
compare_lengths_to_empty(uncompressed_lengths.as_slice::<$P>().iter().copied(), operator)
60+
})
61+
}
62+
};
63+
64+
return Ok(Some(
65+
BoolArray::try_new(buffer, left.validity())?.into_array(),
66+
));
67+
}
68+
69+
// The following section only supports Eq/NotEq
70+
if !matches!(operator, Operator::Eq | Operator::NotEq) {
71+
return Ok(None);
72+
}
73+
3874
let symbols = left.symbols().into_primitive()?;
3975
let symbols_u64 = symbols.as_slice::<u64>();
4076

@@ -68,11 +104,7 @@ fn compare_fsst_constant(
68104
};
69105

70106
let rhs = ConstantArray::new(encoded_scalar, left.len());
71-
compare(
72-
left.codes(),
73-
rhs,
74-
if equal { Operator::Eq } else { Operator::NotEq },
75-
)
107+
compare(left.codes(), rhs, operator).map(Some)
76108
}
77109

78110
#[cfg(test)]

vortex-array/src/array/varbin/compute/compare.rs

Lines changed: 19 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@ use arrow_array::{BinaryArray, StringArray};
22
use arrow_buffer::BooleanBuffer;
33
use arrow_ord::cmp;
44
use itertools::Itertools;
5-
use num_traits::Zero;
65
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
76
use vortex_error::{vortex_bail, vortex_err, VortexExpect as _, VortexResult};
87

98
use crate::array::{BoolArray, PrimitiveArray, VarBinArray, VarBinEncoding};
109
use crate::arrow::{from_arrow_array_with_len, Datum};
11-
use crate::compute::{CompareFn, Operator};
10+
use crate::compute::{compare_lengths_to_empty, CompareFn, Operator};
1211
use crate::variants::PrimitiveArrayTrait as _;
1312
use crate::{Array, IntoArray, IntoCanonical};
1413

@@ -25,24 +24,15 @@ impl CompareFn<VarBinArray> for VarBinEncoding {
2524
let len = lhs.len();
2625

2726
let rhs_is_empty = match rhs_const.dtype() {
28-
DType::Utf8(_) => {
29-
let v = rhs_const
30-
.as_utf8()
31-
.value()
32-
.vortex_expect("null scalar handled in top-level");
33-
v.is_empty()
34-
}
35-
DType::Binary(_) => {
36-
let v = rhs_const
37-
.as_binary()
38-
.value()
39-
.vortex_expect("null scalar handled in top-level");
40-
v.is_empty()
41-
}
42-
_ => vortex_bail!(
43-
"VarBin array RHS can only be Utf8 or Binary, given {}",
44-
rhs_const.dtype()
45-
),
27+
DType::Binary(_) => rhs_const
28+
.as_binary()
29+
.is_empty()
30+
.vortex_expect("RHS should not be null"),
31+
DType::Utf8(_) => rhs_const
32+
.as_utf8()
33+
.is_empty()
34+
.vortex_expect("RHS should not be null"),
35+
_ => vortex_bail!("VarBinArray can only have type of Binary or Utf8"),
4636
};
4737

4838
if rhs_is_empty {
@@ -54,7 +44,7 @@ impl CompareFn<VarBinArray> for VarBinEncoding {
5444
_ => {
5545
let lhs_offsets = lhs.offsets().into_canonical()?.into_primitive()?;
5646
match_each_native_ptype!(lhs_offsets.ptype(), |$P| {
57-
compare_to_empty::<$P>(lhs_offsets, operator)
47+
compare_offsets_to_empty::<$P>(lhs_offsets, operator)
5848
})
5949
}
6050
};
@@ -101,25 +91,14 @@ impl CompareFn<VarBinArray> for VarBinEncoding {
10191
}
10292
}
10393

104-
/// All comparison can be expressed in terms of equality. "" is the absolute min of possible value.
105-
const fn cmp_to_empty<T: PartialEq + PartialOrd + Zero>(op: Operator) -> fn(T) -> bool {
106-
match op {
107-
Operator::Eq | Operator::Lte => |v| v == T::zero(),
108-
Operator::NotEq | Operator::Gt => |v| v != T::zero(),
109-
Operator::Gte => |_| true,
110-
Operator::Lt => |_| false,
111-
}
112-
}
113-
114-
fn compare_to_empty<T>(array: PrimitiveArray, op: Operator) -> BooleanBuffer
115-
where
116-
T: NativePType,
117-
{
118-
let cmp_fn = cmp_to_empty::<T>(op);
119-
let slice = array.as_slice::<T>();
120-
slice
94+
fn compare_offsets_to_empty<P: NativePType>(
95+
offsets: PrimitiveArray,
96+
operator: Operator,
97+
) -> BooleanBuffer {
98+
let lengths_iter = offsets
99+
.as_slice::<P>()
121100
.iter()
122101
.tuple_windows()
123-
.map(|(&s, &e)| cmp_fn(e - s))
124-
.collect::<BooleanBuffer>()
102+
.map(|(&s, &e)| e - s);
103+
compare_lengths_to_empty(lengths_iter, operator)
125104
}

vortex-array/src/compute/compare.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use core::fmt;
22
use std::fmt::{Display, Formatter};
33

4+
use arrow_buffer::BooleanBuffer;
45
use arrow_ord::cmp;
5-
use vortex_dtype::{DType, Nullability};
6+
use vortex_dtype::{DType, NativePType, Nullability};
67
use vortex_error::{vortex_bail, VortexError, VortexResult};
78
use vortex_scalar::Scalar;
89

@@ -166,6 +167,24 @@ pub fn compare(
166167
Ok(result)
167168
}
168169

170+
/// Helper function to compare empty values with arrays that have external value length information
171+
/// like `VarBin`.
172+
pub fn compare_lengths_to_empty<P, I>(lengths: I, op: Operator) -> BooleanBuffer
173+
where
174+
P: NativePType,
175+
I: Iterator<Item = P>,
176+
{
177+
// All comparison can be expressed in terms of equality. "" is the absolute min of possible value.
178+
let cmp_fn = match op {
179+
Operator::Eq | Operator::Lte => |v| v == P::zero(),
180+
Operator::NotEq | Operator::Gt => |v| v != P::zero(),
181+
Operator::Gte => |_| true,
182+
Operator::Lt => |_| false,
183+
};
184+
185+
lengths.map(cmp_fn).collect::<BooleanBuffer>()
186+
}
187+
169188
/// Implementation of `CompareFn` using the Arrow crate.
170189
fn arrow_compare(left: &Array, right: &Array, operator: Operator) -> VortexResult<Array> {
171190
let nullable = left.dtype().is_nullable() || right.dtype().is_nullable();
@@ -324,4 +343,18 @@ mod tests {
324343
assert_eq!(res.as_bool().value(), Some(false));
325344
assert_eq!(compare.len(), 10);
326345
}
346+
347+
#[rstest::rstest]
348+
#[case(Operator::Eq, vec![false, false, false, true])]
349+
#[case(Operator::NotEq, vec![true, true, true, false])]
350+
#[case(Operator::Gt, vec![true, true, true, false])]
351+
#[case(Operator::Gte, vec![true, true, true, true])]
352+
#[case(Operator::Lt, vec![false, false, false, false])]
353+
#[case(Operator::Lte, vec![false, false, false, true])]
354+
fn test_cmp_to_empty(#[case] op: Operator, #[case] expected: Vec<bool>) {
355+
let lengths: Vec<i32> = vec![1, 5, 7, 0];
356+
357+
let output = compare_lengths_to_empty(lengths.iter().copied(), op);
358+
assert_eq!(Vec::from_iter(output.iter()), expected);
359+
}
327360
}

vortex-array/src/compute/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ pub use boolean::{
1414
and, and_kleene, binary_boolean, or, or_kleene, BinaryBooleanFn, BinaryOperator,
1515
};
1616
pub use cast::{try_cast, CastFn};
17-
pub use compare::{compare, scalar_cmp, CompareFn, Operator};
17+
pub use compare::{compare, compare_lengths_to_empty, scalar_cmp, CompareFn, Operator};
1818
pub use fill_forward::{fill_forward, FillForwardFn};
1919
pub use fill_null::{fill_null, FillNullFn};
2020
pub use filter::{filter, FilterFn};

vortex-scalar/src/binary.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ impl<'a> BinaryScalar<'a> {
4646
))),
4747
))
4848
}
49+
50+
/// Returns whether its value is non-null and empty, otherwise `None`.
51+
pub fn is_empty(&self) -> Option<bool> {
52+
self.value.as_ref().map(|v| v.is_empty())
53+
}
4954
}
5055

5156
impl Scalar {

vortex-scalar/src/utf8.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ impl<'a> Utf8Scalar<'a> {
4747
))),
4848
))
4949
}
50+
51+
/// Returns whether its value is non-null and empty, otherwise `None`.
52+
pub fn is_empty(&self) -> Option<bool> {
53+
self.value.as_ref().map(|v| v.is_empty())
54+
}
5055
}
5156

5257
impl Scalar {

0 commit comments

Comments
 (0)