Skip to content

Commit cbf93fd

Browse files
authored
Mooooaarrrrr generics (#5367)
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 48a528d commit cbf93fd

File tree

11 files changed

+182
-184
lines changed

11 files changed

+182
-184
lines changed

vortex-buffer/src/bit/buf.rs

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::bit::{
88
BitChunks, BitIndexIterator, BitIterator, BitSliceIterator, UnalignedBitChunk,
99
get_bit_unchecked,
1010
};
11-
use crate::{Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer};
11+
use crate::{Alignment, BitBufferMut, Buffer, ByteBuffer, buffer};
1212

1313
/// An immutable bitset stored as a packed byte buffer.
1414
#[derive(Debug, Clone, Eq)]
@@ -114,37 +114,9 @@ impl BitBuffer {
114114
}
115115
}
116116

117-
/// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BitBuffer`
118-
pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
119-
let mut buffer = BufferMut::with_capacity(len.div_ceil(64) * 8);
120-
121-
let chunks = len / 64;
122-
let remainder = len % 64;
123-
for chunk in 0..chunks {
124-
let mut packed = 0;
125-
for bit_idx in 0..64 {
126-
let i = bit_idx + chunk * 64;
127-
packed |= (f(i) as u64) << bit_idx;
128-
}
129-
130-
// SAFETY: Already allocated sufficient capacity
131-
unsafe { buffer.push_unchecked(packed) }
132-
}
133-
134-
if remainder != 0 {
135-
let mut packed = 0;
136-
for bit_idx in 0..remainder {
137-
let i = bit_idx + chunks * 64;
138-
packed |= (f(i) as u64) << bit_idx;
139-
}
140-
141-
// SAFETY: Already allocated sufficient capacity
142-
unsafe { buffer.push_unchecked(packed) }
143-
}
144-
145-
buffer.truncate(len.div_ceil(8));
146-
147-
Self::new(buffer.freeze().into_byte_buffer(), len)
117+
/// Invokes `f` with indexes `0..len` collecting the boolean results into a new [`BitBuffer`].
118+
pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
119+
BitBufferMut::collect_bool(len, f).freeze()
148120
}
149121

150122
/// Get the logical length of this `BoolBuffer`.

vortex-buffer/src/bit/buf_mut.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,43 @@ impl BitBufferMut {
117117
}
118118
}
119119

120+
/// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BitBufferMut`
121+
pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
122+
let mut buffer = BufferMut::with_capacity(len.div_ceil(64) * 8);
123+
124+
let chunks = len / 64;
125+
let remainder = len % 64;
126+
for chunk in 0..chunks {
127+
let mut packed = 0;
128+
for bit_idx in 0..64 {
129+
let i = bit_idx + chunk * 64;
130+
packed |= (f(i) as u64) << bit_idx;
131+
}
132+
133+
// SAFETY: Already allocated sufficient capacity
134+
unsafe { buffer.push_unchecked(packed) }
135+
}
136+
137+
if remainder != 0 {
138+
let mut packed = 0;
139+
for bit_idx in 0..remainder {
140+
let i = bit_idx + chunks * 64;
141+
packed |= (f(i) as u64) << bit_idx;
142+
}
143+
144+
// SAFETY: Already allocated sufficient capacity
145+
unsafe { buffer.push_unchecked(packed) }
146+
}
147+
148+
buffer.truncate(len.div_ceil(8));
149+
150+
Self {
151+
buffer: buffer.into_byte_buffer(),
152+
offset: 0,
153+
len,
154+
}
155+
}
156+
120157
/// Return the underlying byte buffer.
121158
pub fn inner(&self) -> &ByteBufferMut {
122159
&self.buffer

vortex-buffer/src/buffer_mut.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,16 @@ impl<T> BufferMut<T> {
110110
buffer
111111
}
112112

113+
/// Return the [`ByteBufferMut`] for this [`BufferMut<T>`].
114+
pub fn into_byte_buffer(self) -> ByteBufferMut {
115+
ByteBufferMut {
116+
bytes: self.bytes,
117+
length: self.length * size_of::<T>(),
118+
alignment: self.alignment,
119+
_marker: Default::default(),
120+
}
121+
}
122+
113123
/// Get the alignment of the buffer.
114124
#[inline(always)]
115125
pub fn alignment(&self) -> Alignment {

vortex-compute/src/filter/bitbuffer.rs

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,10 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use vortex_buffer::{BitBuffer, BitBufferMut, get_bit};
5-
use vortex_mask::{Mask, MaskIter};
5+
use vortex_mask::Mask;
66

77
use crate::filter::{Filter, MaskIndices};
88

9-
/// If the filter density is above 80%, we use slices to filter the array instead of indices.
10-
// TODO(ngates): we need more experimentation to determine the best threshold here.
11-
const FILTER_SLICES_DENSITY_THRESHOLD: f64 = 0.8;
12-
139
impl Filter<Mask> for &BitBuffer {
1410
type Output = BitBuffer;
1511

@@ -23,12 +19,29 @@ impl Filter<Mask> for &BitBuffer {
2319
match selection_mask {
2420
Mask::AllTrue(_) => self.clone(),
2521
Mask::AllFalse(_) => BitBuffer::empty(),
26-
Mask::Values(v) => match v.threshold_iter(FILTER_SLICES_DENSITY_THRESHOLD) {
27-
MaskIter::Indices(indices) => filter_indices(self, indices),
28-
MaskIter::Slices(slices) => {
29-
filter_slices(self, selection_mask.true_count(), slices)
30-
}
31-
},
22+
Mask::Values(v) => {
23+
filter_indices(self.inner().as_ref(), self.offset(), v.indices()).freeze()
24+
}
25+
}
26+
}
27+
}
28+
29+
impl Filter<Mask> for &mut BitBufferMut {
30+
type Output = ();
31+
32+
fn filter(self, selection_mask: &Mask) {
33+
assert_eq!(
34+
selection_mask.len(),
35+
self.len(),
36+
"Selection mask length must equal the mask length"
37+
);
38+
39+
match selection_mask {
40+
Mask::AllTrue(_) => {}
41+
Mask::AllFalse(_) => self.clear(),
42+
Mask::Values(v) => {
43+
*self = filter_indices(self.inner().as_slice(), self.offset(), v.indices())
44+
}
3245
}
3346
}
3447
}
@@ -37,25 +50,24 @@ impl Filter<MaskIndices<'_>> for &BitBuffer {
3750
type Output = BitBuffer;
3851

3952
fn filter(self, indices: &MaskIndices) -> BitBuffer {
40-
filter_indices(self, indices)
53+
filter_indices(self.inner().as_ref(), self.offset(), indices).freeze()
4154
}
4255
}
4356

44-
fn filter_indices(bools: &BitBuffer, indices: &[usize]) -> BitBuffer {
45-
let buffer = bools.inner().as_ref();
46-
BitBuffer::collect_bool(indices.len(), |idx| {
47-
let idx = *unsafe { indices.get_unchecked(idx) };
48-
get_bit(buffer, bools.offset() + idx)
49-
})
50-
}
57+
impl Filter<MaskIndices<'_>> for &mut BitBufferMut {
58+
type Output = ();
5159

52-
fn filter_slices(buffer: &BitBuffer, output_len: usize, slices: &[(usize, usize)]) -> BitBuffer {
53-
let mut builder = BitBufferMut::with_capacity(output_len);
54-
for (start, end) in slices {
55-
// TODO(ngates): we probably want a borrowed slice for things like this.
56-
builder.append_buffer(&buffer.slice(*start..*end));
60+
fn filter(self, indices: &MaskIndices) {
61+
*self = filter_indices(self.inner().as_ref(), self.offset(), indices)
5762
}
58-
builder.freeze()
63+
}
64+
65+
fn filter_indices(bools: &[u8], bit_offset: usize, indices: &[usize]) -> BitBufferMut {
66+
// FIXME(ngates): this is slower than it could be!
67+
BitBufferMut::collect_bool(indices.len(), |idx| {
68+
let idx = *unsafe { indices.get_unchecked(idx) };
69+
get_bit(bools, bit_offset + idx)
70+
})
5971
}
6072

6173
#[cfg(test)]
@@ -64,20 +76,10 @@ mod test {
6476

6577
use super::*;
6678

67-
#[test]
68-
fn filter_bool_by_slice_test() {
69-
let bits = bitbuffer![1 1 0];
70-
71-
let filtered = filter_slices(&bits, 2, &[(0, 1), (2, 3)]);
72-
assert_eq!(2, filtered.len());
73-
74-
assert_eq!(filtered, bitbuffer![1 0])
75-
}
76-
7779
#[test]
7880
fn filter_bool_by_index_test() {
7981
let buf = bitbuffer![1 1 0];
80-
let filtered = filter_indices(&buf, &[0, 2]);
82+
let filtered = filter_indices(buf.inner().as_ref(), 0, &[0, 2]).freeze();
8183
assert_eq!(2, filtered.len());
8284
assert_eq!(filtered, bitbuffer![1 0])
8385
}

vortex-compute/src/filter/slice.rs

Whitespace-only changes.

vortex-compute/src/filter/vector/bool.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use vortex_buffer::BitBuffer;
4+
use vortex_buffer::{BitBuffer, BitBufferMut};
55
use vortex_mask::{Mask, MaskMut};
66
use vortex_vector::VectorOps;
77
use vortex_vector::bool::{BoolVector, BoolVectorMut};
@@ -28,18 +28,15 @@ where
2828

2929
impl<M> Filter<M> for &mut BoolVectorMut
3030
where
31-
for<'a> &'a BitBuffer: Filter<M, Output = BitBuffer>,
31+
for<'a> &'a mut BitBufferMut: Filter<M, Output = ()>,
3232
for<'a> &'a mut MaskMut: Filter<M, Output = ()>,
3333
{
3434
type Output = ();
3535

3636
fn filter(self, selection: &M) -> Self::Output {
3737
// TODO(aduffy): how can we do this faster in-place?
38-
unsafe {
39-
let bits = self.bits_mut();
40-
*bits = (*bits).clone().freeze().filter(selection).into_mut();
41-
self.validity_mut().filter(selection);
42-
}
38+
unsafe { self.bits_mut().filter(selection) };
39+
unsafe { self.validity_mut().filter(selection) };
4340
}
4441
}
4542

vortex-compute/src/filter/vector/decimal.rs

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,40 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use vortex_mask::Mask;
5-
use vortex_vector::decimal::{DecimalVector, DecimalVectorMut};
4+
use vortex_dtype::i256;
5+
use vortex_vector::decimal::{DVector, DVectorMut, DecimalVector, DecimalVectorMut};
66
use vortex_vector::{match_each_dvector, match_each_dvector_mut};
77

8-
use crate::filter::{Filter, MaskIndices};
9-
10-
impl Filter<Mask> for &DecimalVector {
11-
type Output = DecimalVector;
12-
13-
fn filter(self, selection: &Mask) -> Self::Output {
14-
match_each_dvector!(self, |d| { d.filter(selection).into() })
15-
}
16-
}
17-
18-
impl Filter<MaskIndices<'_>> for &DecimalVector {
8+
use crate::filter::Filter;
9+
10+
impl<M> Filter<M> for &DecimalVector
11+
where
12+
for<'a> &'a DVector<i8>: Filter<M, Output = DVector<i8>>,
13+
for<'a> &'a DVector<i16>: Filter<M, Output = DVector<i16>>,
14+
for<'a> &'a DVector<i32>: Filter<M, Output = DVector<i32>>,
15+
for<'a> &'a DVector<i64>: Filter<M, Output = DVector<i64>>,
16+
for<'a> &'a DVector<i128>: Filter<M, Output = DVector<i128>>,
17+
for<'a> &'a DVector<i256>: Filter<M, Output = DVector<i256>>,
18+
{
1919
type Output = DecimalVector;
2020

21-
fn filter(self, selection: &MaskIndices) -> Self::Output {
21+
fn filter(self, selection: &M) -> Self::Output {
2222
match_each_dvector!(self, |d| { d.filter(selection).into() })
2323
}
2424
}
2525

26-
impl Filter<Mask> for &mut DecimalVectorMut {
27-
type Output = ();
28-
29-
fn filter(self, selection: &Mask) -> Self::Output {
30-
match_each_dvector_mut!(self, |d| { d.filter(selection) });
31-
}
32-
}
33-
34-
impl Filter<MaskIndices<'_>> for &mut DecimalVectorMut {
26+
impl<M> Filter<M> for &mut DecimalVectorMut
27+
where
28+
for<'a> &'a mut DVectorMut<i8>: Filter<M, Output = ()>,
29+
for<'a> &'a mut DVectorMut<i16>: Filter<M, Output = ()>,
30+
for<'a> &'a mut DVectorMut<i32>: Filter<M, Output = ()>,
31+
for<'a> &'a mut DVectorMut<i64>: Filter<M, Output = ()>,
32+
for<'a> &'a mut DVectorMut<i128>: Filter<M, Output = ()>,
33+
for<'a> &'a mut DVectorMut<i256>: Filter<M, Output = ()>,
34+
{
3535
type Output = ();
3636

37-
fn filter(self, selection: &MaskIndices) -> Self::Output {
37+
fn filter(self, selection: &M) -> Self::Output {
3838
match_each_dvector_mut!(self, |d| { d.filter(selection) });
3939
}
4040
}
@@ -48,6 +48,7 @@ mod tests {
4848
use vortex_vector::{VectorMutOps, VectorOps};
4949

5050
use super::*;
51+
use crate::filter::MaskIndices;
5152

5253
#[test]
5354
fn test_filter_decimal_vector_with_mask() {

vortex-compute/src/filter/vector/fixed_size_list.rs

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,26 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use vortex_mask::Mask;
54
use vortex_vector::fixed_size_list::{FixedSizeListVector, FixedSizeListVectorMut};
65

7-
use crate::filter::{Filter, MaskIndices};
6+
use crate::filter::Filter;
87

98
// TODO(aduffy): there really isn't a cheap way to implement these is there.
109

11-
impl Filter<Mask> for &FixedSizeListVector {
10+
impl<M> Filter<M> for &FixedSizeListVector {
1211
type Output = FixedSizeListVector;
1312

14-
fn filter(self, _selection: &Mask) -> Self::Output {
13+
fn filter(self, _selection: &M) -> Self::Output {
1514
// We need to spread the mask out to point to offsets from
1615
// the inner vector type
1716
todo!()
1817
}
1918
}
2019

21-
impl Filter<MaskIndices<'_>> for &FixedSizeListVector {
22-
type Output = FixedSizeListVector;
23-
24-
fn filter(self, _selection: &MaskIndices) -> Self::Output {
25-
// We need to spread the mask out to point to offsets from
26-
// the inner vector type
27-
todo!()
28-
}
29-
}
30-
31-
impl Filter<Mask> for &mut FixedSizeListVectorMut {
32-
type Output = ();
33-
34-
fn filter(self, _selection: &Mask) -> Self::Output {
35-
// We need to spread the mask out to point to offsets from
36-
// the inner vector type
37-
todo!()
38-
}
39-
}
40-
41-
impl Filter<MaskIndices<'_>> for &mut FixedSizeListVectorMut {
20+
impl<M> Filter<M> for &mut FixedSizeListVectorMut {
4221
type Output = ();
4322

44-
fn filter(self, _selection: &MaskIndices) -> Self::Output {
23+
fn filter(self, _selection: &M) -> Self::Output {
4524
// We need to spread the mask out to point to offsets from
4625
// the inner vector type
4726
todo!()

0 commit comments

Comments
 (0)