Skip to content

Commit 9c31eec

Browse files
authored
fix BitInteger hashing regression (#59707)
We cannot help that the BitInteger `>>>` method is very slow, but we can vectorize this getindex, since that is why we have `load_le_array` in the first place. This appears to give roughly equivalent performance on random numbers and up to 10x faster performance in other cases. Closes #59702
1 parent bb3be0d commit 9c31eec

File tree

1 file changed

+9
-23
lines changed

1 file changed

+9
-23
lines changed

base/hashing.jl

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -72,36 +72,22 @@ hash(x::Union{Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32}, h::UInt) = hash(
7272

7373
# IntegerCodeUnits provides a little-endian byte representation of integers
7474
struct IntegerCodeUnits{T<:Integer} <: AbstractVector{UInt8}
75-
value::T
75+
uvalue::T
7676
num_bytes::Int
7777

7878
function IntegerCodeUnits(x::T) where {T<:Integer}
7979
# Calculate number of bytes needed (always pad to full byte)
8080
u = abs(x)
8181
num_bytes = max(cld(top_set_bit(u), 8), 1)
82-
return new{T}(x, num_bytes)
82+
return new{T}(u, num_bytes)
8383
end
8484
end
85+
size(units::IntegerCodeUnits) = (units.num_bytes,)
86+
length(units::IntegerCodeUnits) = units.num_bytes
87+
@inline getindex(units::IntegerCodeUnits, i::Int) = (units.uvalue >>> (8 * (i - 1))) % UInt8
88+
@inline load_le_array(::Type{UInt64}, units::IntegerCodeUnits, idx) = (units.uvalue >>> (8 * (idx - 1))) % UInt64
89+
@inline load_le_array(::Type{UInt32}, units::IntegerCodeUnits, idx) = (units.uvalue >>> (8 * (idx - 1))) % UInt32
8590

86-
function Base.size(units::IntegerCodeUnits)
87-
return (units.num_bytes,)
88-
end
89-
90-
function Base.length(units::IntegerCodeUnits)
91-
return units.num_bytes
92-
end
93-
94-
function Base.getindex(units::IntegerCodeUnits, i::Int)
95-
@boundscheck checkbounds(units, i)
96-
u = abs(units.value)
97-
byte_pos = i - 1
98-
return UInt8((u >>> (8 * byte_pos)) & 0xff)
99-
end
100-
101-
function Base.iterate(units::IntegerCodeUnits, state::Int = 1)
102-
state > units.num_bytes && return nothing
103-
return units[state], state + 1
104-
end
10591

10692
# Main interface function to get little-endian byte representation of integers
10793
codeunits(x::Integer) = IntegerCodeUnits(x)
@@ -114,7 +100,7 @@ end
114100
utf8units(s::AbstractString) = codeunit(s) <: UInt8 ? codeunits(s) : UTF8Units(s)
115101

116102
# Iterator state: (char_iter_state, remaining_utf8_bytes)
117-
function Base.iterate(units::UTF8Units)
103+
function iterate(units::UTF8Units)
118104
char_result = iterate(units.string)
119105
char_result === nothing && return nothing
120106
char, char_state = char_result
@@ -128,7 +114,7 @@ function Base.iterate(units::UTF8Units)
128114
return first_byte, (char_state, remaining_bytes)
129115
end
130116

131-
function Base.iterate(units::UTF8Units, state)
117+
function iterate(units::UTF8Units, state)
132118
char_state, remaining_bytes = state
133119
# If we have more bytes from current char, return next byte
134120
if remaining_bytes != 0

0 commit comments

Comments
 (0)