Skip to content

Commit a02723a

Browse files
authored
feat: add vortex-mask crate (#2019)
I decided to stick with the vortex-blah naming scheme.. instead of my much better name "inthesea"
1 parent 046bff3 commit a02723a

File tree

7 files changed

+638
-0
lines changed

7 files changed

+638
-0
lines changed

Cargo.lock

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ members = [
1717
"vortex-io",
1818
"vortex-ipc",
1919
"vortex-layout",
20+
"vortex-mask",
2021
"vortex-proto",
2122
"vortex-sampling-compressor",
2223
"vortex-scalar",

vortex-mask/Cargo.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[package]
2+
name = "vortex-mask"
3+
description = "Vortex Mask - sorted, unique, positive integers"
4+
version.workspace = true
5+
homepage.workspace = true
6+
repository.workspace = true
7+
authors.workspace = true
8+
license.workspace = true
9+
keywords.workspace = true
10+
include.workspace = true
11+
edition.workspace = true
12+
rust-version.workspace = true
13+
readme.workspace = true
14+
categories.workspace = true
15+
16+
[dependencies]
17+
arrow-buffer = { workspace = true }
18+
vortex-error = { workspace = true }
19+
20+
[lints]
21+
workspace = true

vortex-mask/src/bitand.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
use std::ops::BitAnd;
2+
3+
use vortex_error::vortex_panic;
4+
5+
use crate::Mask;
6+
7+
impl BitAnd for &Mask {
8+
type Output = Mask;
9+
10+
fn bitand(self, rhs: Self) -> Self::Output {
11+
if self.len() != rhs.len() {
12+
vortex_panic!("FilterMasks must have the same length");
13+
}
14+
if self.true_count() == 0 || rhs.true_count() == 0 {
15+
return Mask::new_false(self.len());
16+
}
17+
if self.true_count() == self.len() {
18+
return rhs.clone();
19+
}
20+
if rhs.true_count() == self.len() {
21+
return self.clone();
22+
}
23+
24+
if let (Some(lhs), Some(rhs)) = (self.0.buffer.get(), rhs.0.buffer.get()) {
25+
return Mask::from_buffer(lhs & rhs);
26+
}
27+
28+
if let (Some(lhs), Some(rhs)) = (self.0.indices.get(), rhs.0.indices.get()) {
29+
// TODO(ngates): this may only make sense for sparse indices.
30+
return Mask::from_intersection_indices(
31+
self.len(),
32+
lhs.iter().copied(),
33+
rhs.iter().copied(),
34+
);
35+
}
36+
37+
// TODO(ngates): we could perform a more efficient bitandion for slices.
38+
Mask::from_buffer(self.boolean_buffer() & rhs.boolean_buffer())
39+
}
40+
}

vortex-mask/src/eq.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
use crate::Mask;
2+
3+
impl PartialEq for Mask {
4+
fn eq(&self, other: &Self) -> bool {
5+
if self.len() != other.len() {
6+
return false;
7+
}
8+
if self.true_count() != other.true_count() {
9+
return false;
10+
}
11+
12+
// Since the true counts are the same, a full or empty mask is equal to the other mask.
13+
if self.true_count() == 0 || self.true_count() == self.len() {
14+
return true;
15+
}
16+
17+
// Compare the buffer if both masks are non-empty.
18+
if let (Some(buffer), Some(other)) = (self.0.buffer.get(), other.0.buffer.get()) {
19+
return buffer == other;
20+
}
21+
22+
// Compare the indices if both masks are non-empty.
23+
if let (Some(indices), Some(other)) = (self.0.indices.get(), other.0.indices.get()) {
24+
return indices == other;
25+
}
26+
27+
// Compare the slices if both masks are non-empty.
28+
if let (Some(slices), Some(other)) = (self.0.slices.get(), other.0.slices.get()) {
29+
return slices == other;
30+
}
31+
32+
// Otherwise, we fall back to comparison based on sparsity.
33+
// We could go further an exhaustively check whose OnceLocks are initialized, but that's
34+
// probably not worth the effort.
35+
self.boolean_buffer() == other.boolean_buffer()
36+
}
37+
}
38+
39+
impl Eq for Mask {}
40+
41+
#[cfg(test)]
42+
mod test {
43+
use arrow_buffer::BooleanBuffer;
44+
45+
use crate::Mask;
46+
47+
#[test]
48+
fn filter_mask_eq() {
49+
assert_eq!(
50+
Mask::new_true(5),
51+
Mask::from_buffer(BooleanBuffer::new_set(5))
52+
);
53+
assert_eq!(
54+
Mask::new_false(5),
55+
Mask::from_buffer(BooleanBuffer::new_unset(5))
56+
);
57+
assert_eq!(
58+
Mask::from_indices(5, vec![0, 2, 3]),
59+
Mask::from_slices(5, vec![(0, 1), (2, 4)])
60+
);
61+
assert_eq!(
62+
Mask::from_indices(5, vec![0, 2, 3]),
63+
Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, true, false]))
64+
);
65+
}
66+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
use crate::Mask;
2+
3+
impl Mask {
4+
/// take the intersection of the `mask` with the set of true values in `self`.
5+
///
6+
/// We are more interested in low selectivity `self` (as indices) with a boolean buffer mask,
7+
/// so we don't optimize for other cases, yet.
8+
pub fn intersect_by_rank(&self, mask: &Mask) -> Mask {
9+
assert_eq!(self.true_count(), mask.len());
10+
11+
if mask.true_count() == mask.len() {
12+
return self.clone();
13+
}
14+
15+
if mask.true_count() == 0 {
16+
return Self::new_false(self.len());
17+
}
18+
19+
// TODO(joe): support other fast paths, not converting self & mask into indices,
20+
// however indices are better for sparse masks, so this is the common case for now.
21+
let indices = self.0.indices();
22+
Self::from_indices(
23+
self.len(),
24+
mask.indices()
25+
.iter()
26+
.map(|idx|
27+
// This is verified as safe because we know that the indices are less than the
28+
// mask.len() and we known mask.len() <= self.len(),
29+
// implied by `self.true_count() == mask.len()`.
30+
unsafe{*indices.get_unchecked(*idx)})
31+
.collect(),
32+
)
33+
}
34+
}
35+
36+
#[cfg(test)]
37+
mod test {
38+
use arrow_buffer::BooleanBuffer;
39+
40+
use crate::Mask;
41+
42+
#[test]
43+
fn mask_bitand_all_as_bit_and() {
44+
let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, true, true, true, true]));
45+
let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![
46+
false, true, false, true, true,
47+
]));
48+
assert_eq!(
49+
this.intersect_by_rank(&mask),
50+
Mask::from_indices(5, vec![1, 3, 4])
51+
);
52+
}
53+
54+
#[test]
55+
fn mask_bitand_all_true() {
56+
let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
57+
false, false, true, true, true,
58+
]));
59+
let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, true, true]));
60+
assert_eq!(
61+
this.intersect_by_rank(&mask),
62+
Mask::from_indices(5, vec![2, 3, 4])
63+
);
64+
}
65+
66+
#[test]
67+
fn mask_bitand_true() {
68+
let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
69+
true, false, false, true, true,
70+
]));
71+
let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, false, true]));
72+
assert_eq!(
73+
this.intersect_by_rank(&mask),
74+
Mask::from_indices(5, vec![0, 4])
75+
);
76+
}
77+
78+
#[test]
79+
fn mask_bitand_false() {
80+
let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
81+
true, false, false, true, true,
82+
]));
83+
let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![false, false, false]));
84+
assert_eq!(this.intersect_by_rank(&mask), Mask::from_indices(5, vec![]));
85+
}
86+
}

0 commit comments

Comments
 (0)