@@ -15,6 +15,8 @@ mod varbinview;
1515
1616use std:: sync:: Arc ;
1717
18+ use bitvec:: prelude:: Lsb0 ;
19+ use bitvec:: view:: BitView ;
1820pub use cache:: ConversionCache ;
1921pub use decimal:: precision_to_duckdb_storage_size;
2022use itertools:: Itertools ;
@@ -208,18 +210,186 @@ impl VectorExt for Vector {
208210 true
209211 }
210212 Mask :: Values ( arr) => {
211- // TODO(joe): do this MUCH better, with a shifted u64 copy
212- let mut null_count = 0 ;
213- // SAFETY: Caller guaranteees this.
214- let validity = unsafe { self . ensure_validity_slice ( len) } ;
215- for ( idx, v) in arr. boolean_buffer ( ) . slice ( offset, len) . iter ( ) . enumerate ( ) {
216- if !v {
217- validity. set ( idx, false ) ;
218- null_count += 1 ;
213+ let arr_bits: & [ u64 ] = {
214+ let byte_slice = arr. boolean_buffer ( ) . inner ( ) . as_slice ( ) ;
215+ unsafe {
216+ std:: slice:: from_raw_parts (
217+ byte_slice. as_ptr ( ) as _ ,
218+ byte_slice. len ( ) . div_ceil ( size_of :: < u64 > ( ) ) ,
219+ )
220+ }
221+ } ;
222+ let sliced_bits = & arr_bits. view_bits :: < Lsb0 > ( ) [ offset..] [ ..len] ;
223+ let true_count = sliced_bits. count_ones ( ) ;
224+ if true_count == len {
225+ if let Some ( validity) = unsafe { self . validity_slice_mut ( len) } {
226+ validity. fill ( true ) ;
219227 }
228+ } else if true_count == 0 {
229+ unsafe { self . ensure_validity_slice ( len) } . fill ( false ) ;
230+ } else {
231+ // SAFETY: Caller guaranteees this.
232+ let validity = unsafe { self . ensure_validity_slice ( len) } ;
233+ validity[ ..len] . copy_from_bitslice ( sliced_bits) ;
220234 }
221- null_count == len
235+ true_count == 0
222236 }
223237 }
224238 }
225239}
240+
241+ #[ cfg( test) ]
242+ mod tests {
243+ use arrow_buffer:: buffer:: BooleanBuffer ;
244+ use vortex:: mask:: Mask ;
245+
246+ use super :: VectorExt ;
247+ use crate :: cpp:: DUCKDB_TYPE ;
248+ use crate :: duckdb:: { LogicalType , Vector } ;
249+
250+ #[ test]
251+ fn test_set_validity_all_true ( ) {
252+ let logical_type = LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_BIGINT ) ;
253+ let mut vector = Vector :: with_capacity ( logical_type, 100 ) ;
254+
255+ let mask = Mask :: AllTrue ( 10 ) ;
256+ let all_null = unsafe { vector. set_validity ( & mask, 0 , 10 ) } ;
257+
258+ assert ! ( !all_null) ;
259+ }
260+
261+ #[ test]
262+ fn test_set_validity_all_false ( ) {
263+ let logical_type = LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_BIGINT ) ;
264+ let mut vector = Vector :: with_capacity ( logical_type, 100 ) ;
265+
266+ let mask = Mask :: AllFalse ( 10 ) ;
267+ let all_null = unsafe { vector. set_validity ( & mask, 0 , 10 ) } ;
268+
269+ assert ! ( all_null) ;
270+
271+ let validity = unsafe { vector. validity_slice_mut ( 10 ) . unwrap ( ) } ;
272+ assert ! ( validity. iter( ) . all( |v| !v) ) ;
273+ }
274+
275+ #[ test]
276+ fn test_set_validity_values_all_true ( ) {
277+ let logical_type = LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_BIGINT ) ;
278+ let mut vector = Vector :: with_capacity ( logical_type, 100 ) ;
279+
280+ let bits = vec ! [ true ; 10 ] ;
281+ let buffer = BooleanBuffer :: from ( bits. as_slice ( ) ) ;
282+ let mask = Mask :: from ( buffer) ;
283+
284+ let all_null = unsafe { vector. set_validity ( & mask, 0 , 10 ) } ;
285+
286+ assert ! ( !all_null) ;
287+
288+ // When all values are true, the mask may be optimized to AllTrue,
289+ // so validity_slice_mut may return None (no validity allocated)
290+ if let Some ( validity) = unsafe { vector. validity_slice_mut ( 10 ) } {
291+ assert ! ( validity. iter( ) . all( |v| * v) ) ;
292+ }
293+ }
294+
295+ #[ test]
296+ fn test_set_validity_values_all_false ( ) {
297+ let logical_type = LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_BIGINT ) ;
298+ let mut vector = Vector :: with_capacity ( logical_type, 100 ) ;
299+
300+ let bits = vec ! [ false ; 10 ] ;
301+ let buffer = BooleanBuffer :: from ( bits. as_slice ( ) ) ;
302+ let mask = Mask :: from ( buffer) ;
303+
304+ let all_null = unsafe { vector. set_validity ( & mask, 0 , 10 ) } ;
305+
306+ assert ! ( all_null) ;
307+
308+ let validity = unsafe { vector. validity_slice_mut ( 10 ) . unwrap ( ) } ;
309+ assert ! ( validity. iter( ) . all( |v| !v) ) ;
310+ }
311+
312+ #[ test]
313+ fn test_set_validity_values_mixed ( ) {
314+ let logical_type = LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_BIGINT ) ;
315+ let mut vector = Vector :: with_capacity ( logical_type, 100 ) ;
316+
317+ let bits = vec ! [
318+ true , false , true , true , false , false , true , true , false , true ,
319+ ] ;
320+ let buffer = BooleanBuffer :: from ( bits. as_slice ( ) ) ;
321+ let mask = Mask :: from ( buffer) ;
322+
323+ let all_null = unsafe { vector. set_validity ( & mask, 0 , 10 ) } ;
324+
325+ assert ! ( !all_null) ;
326+
327+ let validity = unsafe { vector. validity_slice_mut ( 10 ) . unwrap ( ) } ;
328+ for ( i, bit) in bits. iter ( ) . enumerate ( ) {
329+ assert_eq ! ( validity[ i] , * bit) ;
330+ }
331+ }
332+
333+ #[ test]
334+ fn test_set_validity_values_with_offset ( ) {
335+ let logical_type = LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_BIGINT ) ;
336+ let mut vector = Vector :: with_capacity ( logical_type, 100 ) ;
337+
338+ let bits = vec ! [
339+ false , false , true , true , false , true , false , true , true , false , true , true , false ,
340+ ] ;
341+ let buffer = BooleanBuffer :: from ( bits. as_slice ( ) ) ;
342+ let mask = Mask :: from ( buffer) ;
343+
344+ let all_null = unsafe { vector. set_validity ( & mask, 2 , 8 ) } ;
345+
346+ assert ! ( !all_null) ;
347+
348+ let validity = unsafe { vector. validity_slice_mut ( 8 ) . unwrap ( ) } ;
349+ for i in 0 ..8 {
350+ assert_eq ! ( validity[ i] , bits[ i + 2 ] ) ;
351+ }
352+ }
353+
354+ #[ test]
355+ fn test_set_validity_values_with_offset_and_smaller_len ( ) {
356+ let logical_type = LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_BIGINT ) ;
357+ let mut vector = Vector :: with_capacity ( logical_type, 100 ) ;
358+
359+ let bits = vec ! [
360+ true , false , true , true , false , false , true , true , false , true , true , true , false ,
361+ true , false ,
362+ ] ;
363+ let buffer = BooleanBuffer :: from ( bits. as_slice ( ) ) ;
364+ let mask = Mask :: from ( buffer) ;
365+
366+ let all_null = unsafe { vector. set_validity ( & mask, 3 , 5 ) } ;
367+
368+ assert ! ( !all_null) ;
369+
370+ let validity = unsafe { vector. validity_slice_mut ( 5 ) . unwrap ( ) } ;
371+ for i in 0 ..5 {
372+ assert_eq ! ( validity[ i] , bits[ i + 3 ] ) ;
373+ }
374+ }
375+
376+ #[ test]
377+ fn test_set_validity_values_64bit_alignment ( ) {
378+ let logical_type = LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_BIGINT ) ;
379+ let mut vector = Vector :: with_capacity ( logical_type, 100 ) ;
380+
381+ let bits = ( 0 ..70 ) . map ( |i| i % 3 == 0 ) . collect :: < Vec < _ > > ( ) ;
382+
383+ let buffer = BooleanBuffer :: from ( bits. as_slice ( ) ) ;
384+ let mask = Mask :: from ( buffer) ;
385+
386+ let all_null = unsafe { vector. set_validity ( & mask, 5 , 60 ) } ;
387+
388+ assert ! ( !all_null) ;
389+
390+ let validity = unsafe { vector. validity_slice_mut ( 60 ) . unwrap ( ) } ;
391+ for i in 0 ..60 {
392+ assert_eq ! ( validity[ i] , bits[ i + 5 ] ) ;
393+ }
394+ }
395+ }
0 commit comments