|
22 | 22 | use std::fmt::{self, Debug}; |
23 | 23 | use std::ops::Sub; |
24 | 24 |
|
| 25 | +use arrow::array::BooleanArray; |
| 26 | +use arrow::buffer::BooleanBuffer; |
25 | 27 | use arrow::datatypes::ArrowNativeType; |
26 | | -use arrow::util::bit_util; |
27 | 28 | use hashbrown::HashTable; |
28 | 29 | use hashbrown::hash_table::Entry::{Occupied, Vacant}; |
29 | 30 |
|
@@ -125,8 +126,8 @@ pub trait JoinHashMapType: Send + Sync { |
125 | 126 | match_indices: &mut Vec<u64>, |
126 | 127 | ) -> Option<JoinHashMapOffset>; |
127 | 128 |
|
128 | | - /// Sets bits in the provided buffer if the corresponding hash exists in the map. |
129 | | - fn set_bits_if_exists(&self, hash_values: &[u64], buffer: &mut [u8]); |
| 129 | + /// Returns a BooleanArray indicating which of the provided hashes exist in the map. |
| 130 | + fn contain_hashes(&self, hash_values: &[u64]) -> BooleanArray; |
130 | 131 |
|
131 | 132 | /// Returns `true` if the join hash map contains no entries. |
132 | 133 | fn is_empty(&self) -> bool; |
@@ -200,8 +201,8 @@ impl JoinHashMapType for JoinHashMapU32 { |
200 | 201 | ) |
201 | 202 | } |
202 | 203 |
|
203 | | - fn set_bits_if_exists(&self, hash_values: &[u64], buffer: &mut [u8]) { |
204 | | - set_bits_if_exists::<u32>(&self.map, hash_values, buffer); |
| 204 | + fn contain_hashes(&self, hash_values: &[u64]) -> BooleanArray { |
| 205 | + contain_hashes(&self.map, hash_values) |
205 | 206 | } |
206 | 207 |
|
207 | 208 | fn is_empty(&self) -> bool { |
@@ -278,8 +279,8 @@ impl JoinHashMapType for JoinHashMapU64 { |
278 | 279 | ) |
279 | 280 | } |
280 | 281 |
|
281 | | - fn set_bits_if_exists(&self, hash_values: &[u64], buffer: &mut [u8]) { |
282 | | - set_bits_if_exists::<u64>(&self.map, hash_values, buffer); |
| 282 | + fn contain_hashes(&self, hash_values: &[u64]) -> BooleanArray { |
| 283 | + contain_hashes(&self.map, hash_values) |
283 | 284 | } |
284 | 285 |
|
285 | 286 | fn is_empty(&self) -> bool { |
@@ -509,44 +510,33 @@ where |
509 | 510 | None |
510 | 511 | } |
511 | 512 |
|
512 | | -pub fn set_bits_if_exists<T>( |
513 | | - map: &HashTable<(u64, T)>, |
514 | | - hash_values: &[u64], |
515 | | - buffer: &mut [u8], |
516 | | -) { |
517 | | - for (i, &hash) in hash_values.iter().enumerate() { |
518 | | - if map.find(hash, |(h, _)| hash == *h).is_some() { |
519 | | - bit_util::set_bit(buffer, i); |
520 | | - } |
521 | | - } |
| 513 | +pub fn contain_hashes<T>(map: &HashTable<(u64, T)>, hash_values: &[u64]) -> BooleanArray { |
| 514 | + let buffer = BooleanBuffer::collect_bool(hash_values.len(), |i| { |
| 515 | + let hash = hash_values[i]; |
| 516 | + map.find(hash, |(h, _)| hash == *h).is_some() |
| 517 | + }); |
| 518 | + BooleanArray::new(buffer, None) |
522 | 519 | } |
523 | 520 |
|
524 | 521 | #[cfg(test)] |
525 | 522 | mod tests { |
526 | 523 | use super::*; |
527 | 524 |
|
528 | 525 | #[test] |
529 | | - fn test_set_bits_if_exists() { |
| 526 | + fn test_contain_hashes() { |
530 | 527 | let mut hash_map = JoinHashMapU32::with_capacity(10); |
531 | | - // Build side: insert 10, 20, 30 |
532 | 528 | hash_map.update_from_iter(Box::new([10u64, 20u64, 30u64].iter().enumerate()), 0); |
533 | 529 |
|
534 | | - // Probe side: test both existing and non-existing hashes |
535 | 530 | let probe_hashes = vec![10, 11, 20, 21, 30, 31]; |
536 | | - let mut buffer = vec![0u8; 1]; |
537 | | - hash_map.set_bits_if_exists(&probe_hashes, &mut buffer); |
| 531 | + let array = hash_map.contain_hashes(&probe_hashes); |
| 532 | + |
| 533 | + assert_eq!(array.len(), probe_hashes.len()); |
538 | 534 |
|
539 | 535 | for (i, &hash) in probe_hashes.iter().enumerate() { |
540 | 536 | if matches!(hash, 10 | 20 | 30) { |
541 | | - assert!( |
542 | | - bit_util::get_bit(&buffer, i), |
543 | | - "Hash {hash} should exist in the map" |
544 | | - ); |
| 537 | + assert!(array.value(i), "Hash {hash} should exist in the map"); |
545 | 538 | } else { |
546 | | - assert!( |
547 | | - !bit_util::get_bit(&buffer, i), |
548 | | - "Hash {hash} should NOT exist in the map" |
549 | | - ); |
| 539 | + assert!(!array.value(i), "Hash {hash} should NOT exist in the map"); |
550 | 540 | } |
551 | 541 | } |
552 | 542 | } |
|
0 commit comments