Skip to content

Commit 494851a

Browse files
authored
Feature: Improve hash Expr performance (#16977)
* Avoid hashing the signature in UDAF/UDF/UDWF * Use ahash::Hasher instead of DefaultHasher * Use ahash::RandomState instead of std::hash::RandomState * Add ahash dependency to other crates * Format toml files * Only hash the type_id in UDFs, UDFWs and UDFAs by default * Revert "Add ahash dependency to other crates" This reverts commit d3030c6. * Revert "Use ahash::RandomState instead of std::hash::RandomState" This reverts commit fc7c5c7. * Revert "Use ahash::Hasher instead of DefaultHasher" This reverts commit dc140c1. * Remove FFI ahash dependency
1 parent 6ea01d1 commit 494851a

File tree

3 files changed

+9
-12
lines changed

3 files changed

+9
-12
lines changed

datafusion/expr/src/udaf.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -942,13 +942,12 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
942942
/// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs,
943943
/// their `hash_value`s must be the same.
944944
///
945-
/// By default, it is consistent with default implementation of [`Self::equals`].
945+
/// By default, it only hashes the type. The other fields are not hashed, as usually the
946+
/// name, signature, and aliases are implied by the UDF type. Recall that UDFs with state
947+
/// (and thus possibly changing fields) must override [`Self::equals`] and [`Self::hash_value`].
946948
fn hash_value(&self) -> u64 {
947949
let hasher = &mut DefaultHasher::new();
948950
self.as_any().type_id().hash(hasher);
949-
self.name().hash(hasher);
950-
self.aliases().hash(hasher);
951-
self.signature().hash(hasher);
952951
hasher.finish()
953952
}
954953

datafusion/expr/src/udf.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -720,13 +720,12 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
720720
/// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs,
721721
/// their `hash_value`s must be the same.
722722
///
723-
/// By default, it is consistent with default implementation of [`Self::equals`].
723+
/// By default, it only hashes the type. The other fields are not hashed, as usually the
724+
/// name, signature, and aliases are implied by the UDF type. Recall that UDFs with state
725+
/// (and thus possibly changing fields) must override [`Self::equals`] and [`Self::hash_value`].
724726
fn hash_value(&self) -> u64 {
725727
let hasher = &mut DefaultHasher::new();
726728
self.as_any().type_id().hash(hasher);
727-
self.name().hash(hasher);
728-
self.aliases().hash(hasher);
729-
self.signature().hash(hasher);
730729
hasher.finish()
731730
}
732731

datafusion/expr/src/udwf.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -385,13 +385,12 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
385385
/// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs,
386386
/// their `hash_value`s must be the same.
387387
///
388-
/// By default, it is consistent with default implementation of [`Self::equals`].
388+
/// By default, it only hashes the type. The other fields are not hashed, as usually the
389+
/// name, signature, and aliases are implied by the UDF type. Recall that UDFs with state
390+
/// (and thus possibly changing fields) must override [`Self::equals`] and [`Self::hash_value`].
389391
fn hash_value(&self) -> u64 {
390392
let hasher = &mut DefaultHasher::new();
391393
self.as_any().type_id().hash(hasher);
392-
self.name().hash(hasher);
393-
self.aliases().hash(hasher);
394-
self.signature().hash(hasher);
395394
hasher.finish()
396395
}
397396

0 commit comments

Comments
 (0)