@@ -54,12 +54,13 @@ use datafusion_physical_expr::utils::{collect_columns, merge_vectors};
5454use datafusion_physical_expr:: {
5555 LexOrdering , PhysicalExpr , PhysicalExprRef , PhysicalSortExpr ,
5656} ;
57+ use hashbrown:: hash_table:: Entry :: { Occupied , Vacant } ;
58+ use hashbrown:: HashTable ;
5759
5860use crate :: joins:: SharedBitmapBuilder ;
5961use crate :: projection:: ProjectionExec ;
6062use futures:: future:: { BoxFuture , Shared } ;
6163use futures:: { ready, FutureExt } ;
62- use hashbrown:: raw:: RawTable ;
6364use parking_lot:: Mutex ;
6465
6566/// Maps a `u64` hash value based on the build side ["on" values] to a list of indices with this key's value.
@@ -126,20 +127,20 @@ use parking_lot::Mutex;
126127/// ```
127128pub struct JoinHashMap {
128129 // Stores hash value to last row index
129- map : RawTable < ( u64 , u64 ) > ,
130+ map : HashTable < ( u64 , u64 ) > ,
130131 // Stores indices in chained list data structure
131132 next : Vec < u64 > ,
132133}
133134
134135impl JoinHashMap {
135136 #[ cfg( test) ]
136- pub ( crate ) fn new ( map : RawTable < ( u64 , u64 ) > , next : Vec < u64 > ) -> Self {
137+ pub ( crate ) fn new ( map : HashTable < ( u64 , u64 ) > , next : Vec < u64 > ) -> Self {
137138 Self { map, next }
138139 }
139140
140141 pub ( crate ) fn with_capacity ( capacity : usize ) -> Self {
141142 JoinHashMap {
142- map : RawTable :: with_capacity ( capacity) ,
143+ map : HashTable :: with_capacity ( capacity) ,
143144 next : vec ! [ 0 ; capacity] ,
144145 }
145146 }
@@ -199,9 +200,9 @@ pub trait JoinHashMapType {
199200 /// Extend with zero
200201 fn extend_zero ( & mut self , len : usize ) ;
201202 /// Returns mutable references to the hash map and the next.
202- fn get_mut ( & mut self ) -> ( & mut RawTable < ( u64 , u64 ) > , & mut Self :: NextType ) ;
203+ fn get_mut ( & mut self ) -> ( & mut HashTable < ( u64 , u64 ) > , & mut Self :: NextType ) ;
203204 /// Returns a reference to the hash map.
204- fn get_map ( & self ) -> & RawTable < ( u64 , u64 ) > ;
205+ fn get_map ( & self ) -> & HashTable < ( u64 , u64 ) > ;
205206 /// Returns a reference to the next.
206207 fn get_list ( & self ) -> & Self :: NextType ;
207208
@@ -212,24 +213,28 @@ pub trait JoinHashMapType {
212213 deleted_offset : usize ,
213214 ) {
214215 let ( mut_map, mut_list) = self . get_mut ( ) ;
215- for ( row, hash_value) in iter {
216- let item = mut_map. get_mut ( * hash_value, |( hash, _) | * hash_value == * hash) ;
217- if let Some ( ( _, index) ) = item {
218- // Already exists: add index to next array
219- let prev_index = * index;
220- // Store new value inside hashmap
221- * index = ( row + 1 ) as u64 ;
222- // Update chained Vec at `row` with previous value
223- mut_list[ row - deleted_offset] = prev_index;
224- } else {
225- mut_map. insert (
226- * hash_value,
227- // store the value + 1 as 0 value reserved for end of list
228- ( * hash_value, ( row + 1 ) as u64 ) ,
229- |( hash, _) | * hash,
230- ) ;
231- // chained list at `row` is already initialized with 0
232- // meaning end of list
216+ for ( row, & hash_value) in iter {
217+ let entry = mut_map. entry (
218+ hash_value,
219+ |& ( hash, _) | hash_value == hash,
220+ |& ( hash, _) | hash,
221+ ) ;
222+
223+ match entry {
224+ Occupied ( mut occupied_entry) => {
225+ // Already exists: add index to next array
226+ let ( _, index) = occupied_entry. get_mut ( ) ;
227+ let prev_index = * index;
228+ // Store new value inside hashmap
229+ * index = ( row + 1 ) as u64 ;
230+ // Update chained Vec at `row` with previous value
231+ mut_list[ row - deleted_offset] = prev_index;
232+ }
233+ Vacant ( vacant_entry) => {
234+ vacant_entry. insert ( ( hash_value, ( row + 1 ) as u64 ) ) ;
235+ // chained list at `row` is already initialized with 0
236+ // meaning end of list
237+ }
233238 }
234239 }
235240 }
@@ -251,7 +256,7 @@ pub trait JoinHashMapType {
251256 for ( row_idx, hash_value) in iter {
252257 // Get the hash and find it in the index
253258 if let Some ( ( _, index) ) =
254- hash_map. get ( * hash_value, |( hash, _) | * hash_value == * hash)
259+ hash_map. find ( * hash_value, |( hash, _) | * hash_value == * hash)
255260 {
256261 let mut i = * index - 1 ;
257262 loop {
@@ -299,7 +304,7 @@ pub trait JoinHashMapType {
299304
300305 let mut remaining_output = limit;
301306
302- let hash_map: & RawTable < ( u64 , u64 ) > = self . get_map ( ) ;
307+ let hash_map: & HashTable < ( u64 , u64 ) > = self . get_map ( ) ;
303308 let next_chain = self . get_list ( ) ;
304309
305310 // Calculate initial `hash_values` index before iterating
@@ -330,7 +335,7 @@ pub trait JoinHashMapType {
330335 let mut row_idx = to_skip;
331336 for hash_value in & hash_values[ to_skip..] {
332337 if let Some ( ( _, index) ) =
333- hash_map. get ( * hash_value, |( hash, _) | * hash_value == * hash)
338+ hash_map. find ( * hash_value, |( hash, _) | * hash_value == * hash)
334339 {
335340 chain_traverse ! (
336341 input_indices,
@@ -358,12 +363,12 @@ impl JoinHashMapType for JoinHashMap {
358363 fn extend_zero ( & mut self , _: usize ) { }
359364
360365 /// Get mutable references to the hash map and the next.
361- fn get_mut ( & mut self ) -> ( & mut RawTable < ( u64 , u64 ) > , & mut Self :: NextType ) {
366+ fn get_mut ( & mut self ) -> ( & mut HashTable < ( u64 , u64 ) > , & mut Self :: NextType ) {
362367 ( & mut self . map , & mut self . next )
363368 }
364369
365370 /// Get a reference to the hash map.
366- fn get_map ( & self ) -> & RawTable < ( u64 , u64 ) > {
371+ fn get_map ( & self ) -> & HashTable < ( u64 , u64 ) > {
367372 & self . map
368373 }
369374
0 commit comments