Skip to content

Commit e755677

Browse files
refactor(in_list): use HashTable instead of HashMap in ArrayStaticFilter
HashTable is better suited for storing values with custom hashing and equality, avoiding the overhead of key-value pairs and simplifying the API usage.
1 parent d2be0cc commit e755677

File tree

1 file changed

+14
-19
lines changed

1 file changed

+14
-19
lines changed

datafusion/physical-expr/src/expressions/in_list/array_filter.rs

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ use arrow::buffer::{BooleanBuffer, NullBuffer};
2222
use arrow::compute::{SortOptions, take};
2323
use arrow::datatypes::DataType;
2424
use arrow::util::bit_iterator::BitIndexIterator;
25+
use datafusion_common::Result;
2526
use datafusion_common::hash_utils::with_hashes;
26-
use datafusion_common::{HashMap, Result};
2727

2828
use ahash::RandomState;
29-
use hashbrown::hash_map::RawEntryMut;
29+
use hashbrown::HashTable;
3030

3131
/// Trait for InList static filters
3232
pub(crate) trait StaticFilter {
@@ -41,11 +41,11 @@ pub(crate) trait StaticFilter {
4141
pub(crate) struct ArrayStaticFilter {
4242
in_array: ArrayRef,
4343
state: RandomState,
44-
/// Used to provide a lookup from value to in list index
44+
/// Stores indices into `in_array` for O(1) lookups.
4545
///
46-
/// Note: usize::hash is not used, instead the raw entry
47-
/// API is used to store entries w.r.t their value
48-
map: HashMap<usize, (), ()>,
46+
/// Uses pre-computed hashes and custom equality based on array values
47+
/// rather than the indices themselves.
48+
table: HashTable<usize>,
4949
}
5050

5151
impl StaticFilter for ArrayStaticFilter {
@@ -89,11 +89,8 @@ impl StaticFilter for ArrayStaticFilter {
8989
}
9090

9191
let hash = hashes[i];
92-
let contains = self
93-
.map
94-
.raw_entry()
95-
.from_hash(hash, |idx| cmp(i, *idx).is_eq())
96-
.is_some();
92+
let contains =
93+
self.table.find(hash, |idx| cmp(i, *idx).is_eq()).is_some();
9794

9895
match contains {
9996
true => Some(!negated),
@@ -119,23 +116,21 @@ impl ArrayStaticFilter {
119116
return Ok(ArrayStaticFilter {
120117
in_array,
121118
state: RandomState::new(),
122-
map: HashMap::with_hasher(()),
119+
table: HashTable::new(),
123120
});
124121
}
125122

126123
let state = RandomState::new();
127-
let mut map: HashMap<usize, (), ()> = HashMap::with_hasher(());
124+
let mut table = HashTable::new();
128125

129126
with_hashes([&in_array], &state, |hashes| -> Result<()> {
130127
let cmp = make_comparator(&in_array, &in_array, SortOptions::default())?;
131128

132129
let insert_value = |idx| {
133130
let hash = hashes[idx];
134-
if let RawEntryMut::Vacant(v) = map
135-
.raw_entry_mut()
136-
.from_hash(hash, |x| cmp(*x, idx).is_eq())
137-
{
138-
v.insert_with_hasher(hash, idx, (), |x| hashes[*x]);
131+
// Only insert if not already present
132+
if table.find(hash, |x| cmp(*x, idx).is_eq()).is_none() {
133+
table.insert_unique(hash, idx, |x| hashes[*x]);
139134
}
140135
};
141136

@@ -153,7 +148,7 @@ impl ArrayStaticFilter {
153148
Ok(Self {
154149
in_array,
155150
state,
156-
map,
151+
table,
157152
})
158153
}
159154
}

0 commit comments

Comments
 (0)