Skip to content

Commit ac20942

Browse files
joseph-isaacsclaude
andcommitted
feat[compute]: add Compare impl for all datum types
Adds Compare trait implementations for all remaining datum types: - NullDatum: null comparisons always return null (validity=false) - ListViewDatum: lexicographic comparison of list elements - FixedSizeListDatum: lexicographic comparison of fixed-size list elements - StructDatum: lexicographic comparison by field order Updates the Datum Compare impl to handle all TypedDatum variants. Note: ListViewVector::scalar_at has a bug where slice doesn't slice the validity mask. Tests for list comparison are omitted until that is fixed. Signed-off-by: Joe Isaacs <[email protected]> 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent b09d031 commit ac20942

File tree

6 files changed

+1017
-0
lines changed

6 files changed

+1017
-0
lines changed

vortex-compute/src/comparison/datum.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use vortex_buffer::BitBuffer;
5+
use vortex_mask::Mask;
46
use vortex_vector::BinaryViewDatum;
57
use vortex_vector::BoolDatum;
68
use vortex_vector::Datum;
79
use vortex_vector::DecimalDatum;
10+
use vortex_vector::FixedSizeListDatum;
11+
use vortex_vector::ListViewDatum;
12+
use vortex_vector::NullDatum;
813
use vortex_vector::PrimitiveDatum;
914
use vortex_vector::ScalarOps;
15+
use vortex_vector::StructDatum;
1016
use vortex_vector::TypedDatum;
1117
use vortex_vector::VectorMutOps;
1218
use vortex_vector::VectorOps;
@@ -19,33 +25,80 @@ use vortex_vector::bool::BoolScalar;
1925
use vortex_vector::bool::BoolVector;
2026
use vortex_vector::decimal::DecimalScalar;
2127
use vortex_vector::decimal::DecimalVector;
28+
use vortex_vector::fixed_size_list::FixedSizeListScalar;
29+
use vortex_vector::fixed_size_list::FixedSizeListVector;
30+
use vortex_vector::listview::ListViewScalar;
31+
use vortex_vector::listview::ListViewVector;
32+
use vortex_vector::null::NullScalar;
33+
use vortex_vector::null::NullVector;
2234
use vortex_vector::primitive::PrimitiveScalar;
2335
use vortex_vector::primitive::PrimitiveVector;
36+
use vortex_vector::struct_::StructScalar;
37+
use vortex_vector::struct_::StructVector;
2438

2539
use crate::comparison::Compare;
2640

41+
/// Creates a BoolVector with all-null validity (used for null comparisons).
42+
fn null_bool_vector(len: usize) -> BoolVector {
43+
let bits = BitBuffer::collect_bool(len, |_| false);
44+
BoolVector::new(bits, Mask::AllFalse(len))
45+
}
46+
2747
impl<Op> Compare<Op> for Datum
2848
where
49+
NullDatum: Compare<Op, Output = BoolDatum>,
2950
BoolDatum: Compare<Op, Output = BoolDatum>,
3051
PrimitiveDatum: Compare<Op, Output = BoolDatum>,
3152
DecimalDatum: Compare<Op, Output = BoolDatum>,
3253
BinaryViewDatum<StringType>: Compare<Op, Output = BoolDatum>,
3354
BinaryViewDatum<BinaryType>: Compare<Op, Output = BoolDatum>,
55+
ListViewDatum: Compare<Op, Output = BoolDatum>,
56+
FixedSizeListDatum: Compare<Op, Output = BoolDatum>,
57+
StructDatum: Compare<Op, Output = BoolDatum>,
3458
{
3559
type Output = BoolDatum;
3660

3761
fn compare(self, rhs: Self) -> Self::Output {
3862
match (self.into_typed(), rhs.into_typed()) {
63+
(TypedDatum::Null(d1), TypedDatum::Null(d2)) => d1.compare(d2),
3964
(TypedDatum::Bool(d1), TypedDatum::Bool(d2)) => d1.compare(d2),
4065
(TypedDatum::Primitive(d1), TypedDatum::Primitive(d2)) => d1.compare(d2),
4166
(TypedDatum::Decimal(d1), TypedDatum::Decimal(d2)) => d1.compare(d2),
4267
(TypedDatum::String(d1), TypedDatum::String(d2)) => d1.compare(d2),
4368
(TypedDatum::Binary(d1), TypedDatum::Binary(d2)) => d1.compare(d2),
69+
(TypedDatum::List(d1), TypedDatum::List(d2)) => d1.compare(d2),
70+
(TypedDatum::FixedSizeList(d1), TypedDatum::FixedSizeList(d2)) => d1.compare(d2),
71+
(TypedDatum::Struct(d1), TypedDatum::Struct(d2)) => d1.compare(d2),
4472
_ => unreachable!(""),
4573
}
4674
}
4775
}
4876

77+
impl<Op> Compare<Op> for NullDatum
78+
where
79+
NullVector: Compare<Op, Output = BoolVector>,
80+
NullScalar: Compare<Op, Output = BoolScalar>,
81+
{
82+
type Output = BoolDatum;
83+
84+
fn compare(self, rhs: Self) -> Self::Output {
85+
match (self, rhs) {
86+
(NullDatum::Scalar(sc1), NullDatum::Scalar(sc2)) => BoolDatum::Scalar(sc1.compare(sc2)),
87+
(NullDatum::Vector(vec), NullDatum::Scalar(_)) => {
88+
// Comparing with null always produces null
89+
BoolDatum::Vector(null_bool_vector(vec.len()))
90+
}
91+
(NullDatum::Scalar(_), NullDatum::Vector(vec)) => {
92+
// Comparing with null always produces null
93+
BoolDatum::Vector(null_bool_vector(vec.len()))
94+
}
95+
(NullDatum::Vector(vec1), NullDatum::Vector(vec2)) => {
96+
BoolDatum::Vector(vec1.compare(vec2))
97+
}
98+
}
99+
}
100+
}
101+
49102
impl<Op> Compare<Op> for BoolDatum
50103
where
51104
BoolVector: Compare<Op, Output = BoolVector>,
@@ -152,3 +205,84 @@ where
152205
}
153206
}
154207
}
208+
209+
impl<Op> Compare<Op> for ListViewDatum
210+
where
211+
ListViewScalar: Compare<Op, Output = BoolScalar>,
212+
ListViewVector: Compare<Op, Output = BoolVector>,
213+
{
214+
type Output = BoolDatum;
215+
216+
fn compare(self, rhs: Self) -> Self::Output {
217+
match (self, rhs) {
218+
(ListViewDatum::Scalar(sc1), ListViewDatum::Scalar(sc2)) => {
219+
BoolDatum::Scalar(sc1.compare(sc2))
220+
}
221+
(ListViewDatum::Vector(vec), ListViewDatum::Scalar(sc)) => {
222+
let repeated = sc.repeat(vec.len()).into_list().freeze();
223+
BoolDatum::Vector(vec.compare(repeated))
224+
}
225+
(ListViewDatum::Scalar(sc), ListViewDatum::Vector(vec)) => {
226+
let repeated = sc.repeat(vec.len()).into_list().freeze();
227+
BoolDatum::Vector(repeated.compare(vec))
228+
}
229+
(ListViewDatum::Vector(vec1), ListViewDatum::Vector(vec2)) => {
230+
BoolDatum::Vector(vec1.compare(vec2))
231+
}
232+
}
233+
}
234+
}
235+
236+
impl<Op> Compare<Op> for FixedSizeListDatum
237+
where
238+
FixedSizeListScalar: Compare<Op, Output = BoolScalar>,
239+
FixedSizeListVector: Compare<Op, Output = BoolVector>,
240+
{
241+
type Output = BoolDatum;
242+
243+
fn compare(self, rhs: Self) -> Self::Output {
244+
match (self, rhs) {
245+
(FixedSizeListDatum::Scalar(sc1), FixedSizeListDatum::Scalar(sc2)) => {
246+
BoolDatum::Scalar(sc1.compare(sc2))
247+
}
248+
(FixedSizeListDatum::Vector(vec), FixedSizeListDatum::Scalar(sc)) => {
249+
let repeated = sc.repeat(vec.len()).into_fixed_size_list().freeze();
250+
BoolDatum::Vector(vec.compare(repeated))
251+
}
252+
(FixedSizeListDatum::Scalar(sc), FixedSizeListDatum::Vector(vec)) => {
253+
let repeated = sc.repeat(vec.len()).into_fixed_size_list().freeze();
254+
BoolDatum::Vector(repeated.compare(vec))
255+
}
256+
(FixedSizeListDatum::Vector(vec1), FixedSizeListDatum::Vector(vec2)) => {
257+
BoolDatum::Vector(vec1.compare(vec2))
258+
}
259+
}
260+
}
261+
}
262+
263+
impl<Op> Compare<Op> for StructDatum
264+
where
265+
StructScalar: Compare<Op, Output = BoolScalar>,
266+
StructVector: Compare<Op, Output = BoolVector>,
267+
{
268+
type Output = BoolDatum;
269+
270+
fn compare(self, rhs: Self) -> Self::Output {
271+
match (self, rhs) {
272+
(StructDatum::Scalar(sc1), StructDatum::Scalar(sc2)) => {
273+
BoolDatum::Scalar(sc1.compare(sc2))
274+
}
275+
(StructDatum::Vector(vec), StructDatum::Scalar(sc)) => {
276+
let repeated = sc.repeat(vec.len()).into_struct().freeze();
277+
BoolDatum::Vector(vec.compare(repeated))
278+
}
279+
(StructDatum::Scalar(sc), StructDatum::Vector(vec)) => {
280+
let repeated = sc.repeat(vec.len()).into_struct().freeze();
281+
BoolDatum::Vector(repeated.compare(vec))
282+
}
283+
(StructDatum::Vector(vec1), StructDatum::Vector(vec2)) => {
284+
BoolDatum::Vector(vec1.compare(vec2))
285+
}
286+
}
287+
}
288+
}

0 commit comments

Comments
 (0)