@@ -23,6 +23,7 @@ use std::fmt::{self, Debug};
2323use std:: ops:: Sub ;
2424
2525use arrow:: datatypes:: ArrowNativeType ;
26+ use arrow:: util:: bit_util;
2627use hashbrown:: HashTable ;
2728use hashbrown:: hash_table:: Entry :: { Occupied , Vacant } ;
2829
@@ -124,6 +125,9 @@ pub trait JoinHashMapType: Send + Sync {
124125 match_indices : & mut Vec < u64 > ,
125126 ) -> Option < JoinHashMapOffset > ;
126127
128+ /// Sets bits in the provided buffer if the corresponding hash exists in the map.
129+ fn set_bits_if_exists ( & self , hash_values : & [ u64 ] , buffer : & mut [ u8 ] ) ;
130+
127131 /// Returns `true` if the join hash map contains no entries.
128132 fn is_empty ( & self ) -> bool ;
129133
@@ -196,6 +200,10 @@ impl JoinHashMapType for JoinHashMapU32 {
196200 )
197201 }
198202
203+ fn set_bits_if_exists ( & self , hash_values : & [ u64 ] , buffer : & mut [ u8 ] ) {
204+ set_bits_if_exists :: < u32 > ( & self . map , hash_values, buffer) ;
205+ }
206+
199207 fn is_empty ( & self ) -> bool {
200208 self . map . is_empty ( )
201209 }
@@ -270,6 +278,10 @@ impl JoinHashMapType for JoinHashMapU64 {
270278 )
271279 }
272280
281+ fn set_bits_if_exists ( & self , hash_values : & [ u64 ] , buffer : & mut [ u8 ] ) {
282+ set_bits_if_exists :: < u64 > ( & self . map , hash_values, buffer) ;
283+ }
284+
273285 fn is_empty ( & self ) -> bool {
274286 self . map . is_empty ( )
275287 }
@@ -496,3 +508,46 @@ where
496508 }
497509 None
498510}
511+
512+ pub fn set_bits_if_exists < T > (
513+ map : & HashTable < ( u64 , T ) > ,
514+ hash_values : & [ u64 ] ,
515+ buffer : & mut [ u8 ] ,
516+ ) {
517+ for ( i, & hash) in hash_values. iter ( ) . enumerate ( ) {
518+ if map. find ( hash, |( h, _) | hash == * h) . is_some ( ) {
519+ bit_util:: set_bit ( buffer, i) ;
520+ }
521+ }
522+ }
523+
524+ #[ cfg( test) ]
525+ mod tests {
526+ use super :: * ;
527+
528+ #[ test]
529+ fn test_set_bits_if_exists ( ) {
530+ let mut hash_map = JoinHashMapU32 :: with_capacity ( 10 ) ;
531+ // Build side: insert 10, 20, 30
532+ hash_map. update_from_iter ( Box :: new ( [ 10u64 , 20u64 , 30u64 ] . iter ( ) . enumerate ( ) ) , 0 ) ;
533+
534+ // Probe side: test both existing and non-existing hashes
535+ let probe_hashes = vec ! [ 10 , 11 , 20 , 21 , 30 , 31 ] ;
536+ let mut buffer = vec ! [ 0u8 ; 1 ] ;
537+ hash_map. set_bits_if_exists ( & probe_hashes, & mut buffer) ;
538+
539+ for ( i, & hash) in probe_hashes. iter ( ) . enumerate ( ) {
540+ if matches ! ( hash, 10 | 20 | 30 ) {
541+ assert ! (
542+ bit_util:: get_bit( & buffer, i) ,
543+ "Hash {hash} should exist in the map"
544+ ) ;
545+ } else {
546+ assert ! (
547+ !bit_util:: get_bit( & buffer, i) ,
548+ "Hash {hash} should NOT exist in the map"
549+ ) ;
550+ }
551+ }
552+ }
553+ }
0 commit comments