Skip to content

Commit a6fd5cc

Browse files
authored
Upgrade hashbrown to 0.16 (#19554)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #13433 ## Rationale for this change Keep up to date with hashbrown and possible (performance) improvements, remove the `rawtable` functionality. ## What changes are included in this PR? Upgrading, fixing case of nondeterminism in `HashSet` usage. ## Are these changes tested? Yes existiing tests. ## Are there any user-facing changes? No.
1 parent 13f3843 commit a6fd5cc

File tree

9 files changed

+36
-26
lines changed

9 files changed

+36
-26
lines changed

Cargo.lock

Lines changed: 17 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ flate2 = "1.1.5"
156156
futures = "0.3"
157157
glob = "0.3.0"
158158
half = { version = "2.7.0", default-features = false }
159-
hashbrown = { version = "0.14.5", features = ["raw"] }
159+
hashbrown = { version = "0.16.1" }
160160
hex = { version = "0.4.3" }
161161
indexmap = "2.12.1"
162162
insta = { version = "1.45.0", features = ["glob", "filters"] }

datafusion/common/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ pub use functional_dependencies::{
8585
aggregate_functional_dependencies, get_required_group_by_exprs_indices,
8686
get_target_functional_dependencies,
8787
};
88-
use hashbrown::hash_map::DefaultHashBuilder;
88+
use hashbrown::DefaultHashBuilder;
8989
pub use join_type::{JoinConstraint, JoinSide, JoinType};
9090
pub use nested_struct::cast_column;
9191
pub use null_equality::NullEquality;

datafusion/functions-aggregate/src/array_agg.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1111,7 +1111,7 @@ mod tests {
11111111
])])?;
11121112

11131113
// without compaction, the size is 17112
1114-
assert_eq!(acc.size(), 2184);
1114+
assert_eq!(acc.size(), 2224);
11151115

11161116
Ok(())
11171117
}

datafusion/physical-expr-common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,6 @@ chrono = { workspace = true }
4747
datafusion-common = { workspace = true }
4848
datafusion-expr-common = { workspace = true }
4949
hashbrown = { workspace = true }
50+
indexmap = { workspace = true }
5051
itertools = { workspace = true }
5152
parking_lot = { workspace = true }

datafusion/physical-expr-common/src/sort_expr.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use arrow::datatypes::Schema;
3131
use arrow::record_batch::RecordBatch;
3232
use datafusion_common::{HashSet, Result};
3333
use datafusion_expr_common::columnar_value::ColumnarValue;
34-
34+
use indexmap::IndexSet;
3535
/// Represents Sort operation for a column in a RecordBatch
3636
///
3737
/// Example:
@@ -353,14 +353,14 @@ impl From<PhysicalSortRequirement> for PhysicalSortExpr {
353353
/// 1. It is non-degenerate, meaning it contains at least one element.
354354
/// 2. It is duplicate-free, meaning it does not contain multiple entries for
355355
/// the same column.
356-
#[derive(Debug, Clone)]
356+
#[derive(Clone, Debug)]
357357
pub struct LexOrdering {
358358
/// Vector of sort expressions representing the lexicographical ordering.
359359
exprs: Vec<PhysicalSortExpr>,
360360
/// Set of expressions in the lexicographical ordering, used to ensure
361361
/// that the ordering is duplicate-free. Note that the elements in this
362362
/// set are the same underlying physical expressions as in `exprs`.
363-
set: HashSet<Arc<dyn PhysicalExpr>>,
363+
set: IndexSet<Arc<dyn PhysicalExpr>>,
364364
}
365365

366366
impl LexOrdering {
@@ -371,7 +371,7 @@ impl LexOrdering {
371371
let mut candidate = Self {
372372
// not valid yet; valid publicly-returned instance must be non-empty
373373
exprs: Vec::new(),
374-
set: HashSet::new(),
374+
set: IndexSet::new(),
375375
};
376376
for expr in exprs {
377377
candidate.push(expr);
@@ -421,7 +421,7 @@ impl LexOrdering {
421421
return false;
422422
}
423423
for PhysicalSortExpr { expr, .. } in self.exprs[len..].iter() {
424-
self.set.remove(expr);
424+
self.set.swap_remove(expr);
425425
}
426426
self.exprs.truncate(len);
427427
true

datafusion/physical-expr/src/equivalence/class.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use crate::projection::ProjectionTargets;
2727
use crate::{PhysicalExpr, PhysicalExprRef, PhysicalSortExpr, PhysicalSortRequirement};
2828

2929
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
30-
use datafusion_common::{HashMap, JoinType, Result, ScalarValue};
30+
use datafusion_common::{JoinType, Result, ScalarValue};
3131
use datafusion_physical_expr_common::physical_expr::format_physical_expr_list;
3232

3333
use indexmap::{IndexMap, IndexSet};
@@ -303,7 +303,7 @@ type AugmentedMapping<'a> = IndexMap<
303303
#[derive(Clone, Debug, Default)]
304304
pub struct EquivalenceGroup {
305305
/// A mapping from expressions to their equivalence class key.
306-
map: HashMap<Arc<dyn PhysicalExpr>, usize>,
306+
map: IndexMap<Arc<dyn PhysicalExpr>, usize>,
307307
/// The equivalence classes in this group.
308308
classes: Vec<EquivalenceClass>,
309309
}
@@ -436,7 +436,7 @@ impl EquivalenceGroup {
436436
let cls = self.classes.swap_remove(idx);
437437
// Remove its entries from the lookup table:
438438
for expr in cls.iter() {
439-
self.map.remove(expr);
439+
self.map.swap_remove(expr);
440440
}
441441
// Update the lookup table for the moved class:
442442
if idx < self.classes.len() {
@@ -448,7 +448,7 @@ impl EquivalenceGroup {
448448
/// Updates the entry in lookup table for the given equivalence class with
449449
/// the given index.
450450
fn update_lookup_table(
451-
map: &mut HashMap<Arc<dyn PhysicalExpr>, usize>,
451+
map: &mut IndexMap<Arc<dyn PhysicalExpr>, usize>,
452452
cls: &EquivalenceClass,
453453
idx: usize,
454454
) {

datafusion/physical-expr/src/expressions/case.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@ use arrow::datatypes::{DataType, Schema, UInt32Type, UnionMode};
2929
use arrow::error::ArrowError;
3030
use datafusion_common::cast::as_boolean_array;
3131
use datafusion_common::{
32-
DataFusionError, HashMap, HashSet, Result, ScalarValue, assert_or_internal_err,
33-
exec_err, internal_datafusion_err, internal_err,
32+
DataFusionError, Result, ScalarValue, assert_or_internal_err, exec_err,
33+
internal_datafusion_err, internal_err,
3434
};
3535
use datafusion_expr::ColumnarValue;
36+
use indexmap::{IndexMap, IndexSet};
3637
use std::borrow::Cow;
3738
use std::hash::Hash;
3839
use std::{any::Any, sync::Arc};
@@ -122,7 +123,7 @@ impl CaseBody {
122123
/// Derives a [ProjectedCaseBody] from this [CaseBody].
123124
fn project(&self) -> Result<ProjectedCaseBody> {
124125
// Determine the set of columns that are used in all the expressions of the case body.
125-
let mut used_column_indices = HashSet::<usize>::new();
126+
let mut used_column_indices = IndexSet::<usize>::new();
126127
let mut collect_column_indices = |expr: &Arc<dyn PhysicalExpr>| {
127128
expr.apply(|expr| {
128129
if let Some(column) = expr.as_any().downcast_ref::<Column>() {
@@ -149,7 +150,7 @@ impl CaseBody {
149150
.iter()
150151
.enumerate()
151152
.map(|(projected, original)| (*original, projected))
152-
.collect::<HashMap<usize, usize>>();
153+
.collect::<IndexMap<usize, usize>>();
153154

154155
// Construct the projected body by rewriting each expression from the original body
155156
// using the column index mapping.

datafusion/physical-expr/src/utils/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ pub fn collect_columns(expr: &Arc<dyn PhysicalExpr>) -> HashSet<Column> {
229229
let mut columns = HashSet::<Column>::new();
230230
expr.apply(|expr| {
231231
if let Some(column) = expr.as_any().downcast_ref::<Column>() {
232-
columns.get_or_insert_owned(column);
232+
columns.get_or_insert_with(column, |c| c.clone());
233233
}
234234
Ok(TreeNodeRecursion::Continue)
235235
})

0 commit comments

Comments
 (0)