Skip to content

Commit 1eec72e

Browse files
authored
feat: prefer take to filter for very sparse masks (#1249)
Fixes #1248 It seems to me that if we have fewer than one-in-1024 values there is no chance for SIMD vectorization to make the mask as fast as a take. Mild improvement. ``` group develop layouts take-one-in-1024 take-one-in-128 take-one-in-16 always-take ----- ------- ------- ---------------- --------------- -------------- ----------- vortex-local-fs 1.00 381.3±10.78µs 34.91 13.3±0.13ms 29.49 11.2±0.11ms 29.63 11.3±0.09ms 29.75 11.3±0.13ms 29.71 11.3±0.12ms vortex-tokio-local-disk 1.00 349.5±12.64µs 29.46 10.3±0.14ms 24.23 8.5±0.15ms 24.73 8.6±0.09ms 24.80 8.7±0.12ms 24.85 8.7±0.18ms ```
1 parent 520ffc4 commit 1eec72e

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

vortex-serde/src/layouts/read/mask.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@ use std::fmt::{Display, Formatter};
33

44
use arrow_buffer::{BooleanBuffer, MutableBuffer};
55
use croaring::Bitmap;
6-
use vortex_array::array::BoolArray;
7-
use vortex_array::compute::{filter, slice};
6+
use vortex_array::array::{BoolArray, PrimitiveArray};
7+
use vortex_array::compute::{filter, slice, take};
88
use vortex_array::validity::Validity;
99
use vortex_array::{iterate_integer_array, Array, IntoArray};
1010
use vortex_dtype::PType;
1111
use vortex_error::{vortex_bail, vortex_err, VortexResult};
1212

13+
const PREFER_TAKE_TO_FILTER_DENSITY: f64 = 1.0 / 1024.0;
14+
1315
/// Bitmap of selected rows within given [begin, end) row range
1416
#[derive(Debug, Clone, Default, PartialEq, Eq)]
1517
pub struct RowMask {
@@ -158,12 +160,20 @@ impl RowMask {
158160
return Ok(Some(sliced.clone()));
159161
}
160162

161-
let predicate = self.to_predicate_array()?;
163+
if (true_count as f64 / sliced.len() as f64) < PREFER_TAKE_TO_FILTER_DENSITY {
164+
let indices = self.to_indices_array()?;
165+
take(sliced, indices).map(Some)
166+
} else {
167+
let mask = self.to_mask_array()?;
168+
filter(sliced, mask).map(Some)
169+
}
170+
}
162171

163-
filter(sliced, predicate).map(Some)
172+
pub fn to_indices_array(&self) -> VortexResult<Array> {
173+
Ok(PrimitiveArray::from_vec(self.values.to_vec(), Validity::NonNullable).into_array())
164174
}
165175

166-
pub fn to_predicate_array(&self) -> VortexResult<Array> {
176+
pub fn to_mask_array(&self) -> VortexResult<Array> {
167177
let bitset = self
168178
.values
169179
.to_bitset()

vortex-serde/src/layouts/read/stream.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ impl<R: VortexReadAt + Unpin + 'static> Stream for LayoutBatchStream<R> {
185185
if let Some(row_mask) = &self.row_mask {
186186
batch = and(
187187
batch,
188-
row_mask.slice(sel_begin, sel_end).to_predicate_array()?,
188+
row_mask.slice(sel_begin, sel_end).to_mask_array()?,
189189
)?;
190190
}
191191

0 commit comments

Comments
 (0)