Skip to content

Commit c5ab12e

Browse files
authored
fix: Validity export using shifted u64 copy (#4244)
Signed-off-by: Robert Kruszewski <[email protected]>
1 parent 7dc04c3 commit c5ab12e

File tree

1 file changed

+179
-9
lines changed
  • vortex-duckdb/src/exporter

1 file changed

+179
-9
lines changed

vortex-duckdb/src/exporter/mod.rs

Lines changed: 179 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ mod varbinview;
1515

1616
use std::sync::Arc;
1717

18+
use bitvec::prelude::Lsb0;
19+
use bitvec::view::BitView;
1820
pub use cache::ConversionCache;
1921
pub use decimal::precision_to_duckdb_storage_size;
2022
use itertools::Itertools;
@@ -208,18 +210,186 @@ impl VectorExt for Vector {
208210
true
209211
}
210212
Mask::Values(arr) => {
211-
// TODO(joe): do this MUCH better, with a shifted u64 copy
212-
let mut null_count = 0;
213-
// SAFETY: Caller guaranteees this.
214-
let validity = unsafe { self.ensure_validity_slice(len) };
215-
for (idx, v) in arr.boolean_buffer().slice(offset, len).iter().enumerate() {
216-
if !v {
217-
validity.set(idx, false);
218-
null_count += 1;
213+
let arr_bits: &[u64] = {
214+
let byte_slice = arr.boolean_buffer().inner().as_slice();
215+
unsafe {
216+
std::slice::from_raw_parts(
217+
byte_slice.as_ptr() as _,
218+
byte_slice.len().div_ceil(size_of::<u64>()),
219+
)
220+
}
221+
};
222+
let sliced_bits = &arr_bits.view_bits::<Lsb0>()[offset..][..len];
223+
let true_count = sliced_bits.count_ones();
224+
if true_count == len {
225+
if let Some(validity) = unsafe { self.validity_slice_mut(len) } {
226+
validity.fill(true);
219227
}
228+
} else if true_count == 0 {
229+
unsafe { self.ensure_validity_slice(len) }.fill(false);
230+
} else {
231+
// SAFETY: Caller guaranteees this.
232+
let validity = unsafe { self.ensure_validity_slice(len) };
233+
validity[..len].copy_from_bitslice(sliced_bits);
220234
}
221-
null_count == len
235+
true_count == 0
222236
}
223237
}
224238
}
225239
}
240+
241+
#[cfg(test)]
242+
mod tests {
243+
use arrow_buffer::buffer::BooleanBuffer;
244+
use vortex::mask::Mask;
245+
246+
use super::VectorExt;
247+
use crate::cpp::DUCKDB_TYPE;
248+
use crate::duckdb::{LogicalType, Vector};
249+
250+
#[test]
251+
fn test_set_validity_all_true() {
252+
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT);
253+
let mut vector = Vector::with_capacity(logical_type, 100);
254+
255+
let mask = Mask::AllTrue(10);
256+
let all_null = unsafe { vector.set_validity(&mask, 0, 10) };
257+
258+
assert!(!all_null);
259+
}
260+
261+
#[test]
262+
fn test_set_validity_all_false() {
263+
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT);
264+
let mut vector = Vector::with_capacity(logical_type, 100);
265+
266+
let mask = Mask::AllFalse(10);
267+
let all_null = unsafe { vector.set_validity(&mask, 0, 10) };
268+
269+
assert!(all_null);
270+
271+
let validity = unsafe { vector.validity_slice_mut(10).unwrap() };
272+
assert!(validity.iter().all(|v| !v));
273+
}
274+
275+
#[test]
276+
fn test_set_validity_values_all_true() {
277+
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT);
278+
let mut vector = Vector::with_capacity(logical_type, 100);
279+
280+
let bits = vec![true; 10];
281+
let buffer = BooleanBuffer::from(bits.as_slice());
282+
let mask = Mask::from(buffer);
283+
284+
let all_null = unsafe { vector.set_validity(&mask, 0, 10) };
285+
286+
assert!(!all_null);
287+
288+
// When all values are true, the mask may be optimized to AllTrue,
289+
// so validity_slice_mut may return None (no validity allocated)
290+
if let Some(validity) = unsafe { vector.validity_slice_mut(10) } {
291+
assert!(validity.iter().all(|v| *v));
292+
}
293+
}
294+
295+
#[test]
296+
fn test_set_validity_values_all_false() {
297+
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT);
298+
let mut vector = Vector::with_capacity(logical_type, 100);
299+
300+
let bits = vec![false; 10];
301+
let buffer = BooleanBuffer::from(bits.as_slice());
302+
let mask = Mask::from(buffer);
303+
304+
let all_null = unsafe { vector.set_validity(&mask, 0, 10) };
305+
306+
assert!(all_null);
307+
308+
let validity = unsafe { vector.validity_slice_mut(10).unwrap() };
309+
assert!(validity.iter().all(|v| !v));
310+
}
311+
312+
#[test]
313+
fn test_set_validity_values_mixed() {
314+
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT);
315+
let mut vector = Vector::with_capacity(logical_type, 100);
316+
317+
let bits = vec![
318+
true, false, true, true, false, false, true, true, false, true,
319+
];
320+
let buffer = BooleanBuffer::from(bits.as_slice());
321+
let mask = Mask::from(buffer);
322+
323+
let all_null = unsafe { vector.set_validity(&mask, 0, 10) };
324+
325+
assert!(!all_null);
326+
327+
let validity = unsafe { vector.validity_slice_mut(10).unwrap() };
328+
for (i, bit) in bits.iter().enumerate() {
329+
assert_eq!(validity[i], *bit);
330+
}
331+
}
332+
333+
#[test]
334+
fn test_set_validity_values_with_offset() {
335+
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT);
336+
let mut vector = Vector::with_capacity(logical_type, 100);
337+
338+
let bits = vec![
339+
false, false, true, true, false, true, false, true, true, false, true, true, false,
340+
];
341+
let buffer = BooleanBuffer::from(bits.as_slice());
342+
let mask = Mask::from(buffer);
343+
344+
let all_null = unsafe { vector.set_validity(&mask, 2, 8) };
345+
346+
assert!(!all_null);
347+
348+
let validity = unsafe { vector.validity_slice_mut(8).unwrap() };
349+
for i in 0..8 {
350+
assert_eq!(validity[i], bits[i + 2]);
351+
}
352+
}
353+
354+
#[test]
355+
fn test_set_validity_values_with_offset_and_smaller_len() {
356+
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT);
357+
let mut vector = Vector::with_capacity(logical_type, 100);
358+
359+
let bits = vec![
360+
true, false, true, true, false, false, true, true, false, true, true, true, false,
361+
true, false,
362+
];
363+
let buffer = BooleanBuffer::from(bits.as_slice());
364+
let mask = Mask::from(buffer);
365+
366+
let all_null = unsafe { vector.set_validity(&mask, 3, 5) };
367+
368+
assert!(!all_null);
369+
370+
let validity = unsafe { vector.validity_slice_mut(5).unwrap() };
371+
for i in 0..5 {
372+
assert_eq!(validity[i], bits[i + 3]);
373+
}
374+
}
375+
376+
#[test]
377+
fn test_set_validity_values_64bit_alignment() {
378+
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT);
379+
let mut vector = Vector::with_capacity(logical_type, 100);
380+
381+
let bits = (0..70).map(|i| i % 3 == 0).collect::<Vec<_>>();
382+
383+
let buffer = BooleanBuffer::from(bits.as_slice());
384+
let mask = Mask::from(buffer);
385+
386+
let all_null = unsafe { vector.set_validity(&mask, 5, 60) };
387+
388+
assert!(!all_null);
389+
390+
let validity = unsafe { vector.validity_slice_mut(60).unwrap() };
391+
for i in 0..60 {
392+
assert_eq!(validity[i], bits[i + 5]);
393+
}
394+
}
395+
}

0 commit comments

Comments
 (0)