Skip to content

Commit eceaa39

Browse files
authored
optimize hash implementations (#559)
This optimizes the `hash` implementation added in #452. Creating tuples with statically unknown types that may hold random values at runtime should be avoided for performance reasons. As a result, the allocation regression reported in #558 has been reduced to the previous level, although the time regression from `hash` calculation still remains.
1 parent 5accfef commit eceaa39

File tree

3 files changed

+32
-6
lines changed

3 files changed

+32
-6
lines changed

src/green_tree.jl

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,17 @@ end
7272

7373
Base.summary(node::GreenNode) = summary(node.head)
7474

75-
Base.hash(node::GreenNode, h::UInt) = hash((node.head, node.span, node.children), h)
75+
function Base.hash(node::GreenNode, h::UInt)
76+
children = node.children
77+
if children === nothing
78+
h = hash(nothing, h)
79+
else # optimization - avoid extra allocations from `hash(::AbstractVector, ::UInt)`
80+
for child in children
81+
h = hash(child, h)
82+
end
83+
end
84+
hash(node.head, hash(node.span, h))
85+
end
7686
function Base.:(==)(n1::GreenNode, n2::GreenNode)
7787
n1.head == n2.head && n1.span == n2.span && n1.children == n2.children
7888
end
@@ -129,4 +139,3 @@ function build_tree(::Type{GreenNode}, stream::ParseStream; kws...)
129139
GreenNode(h, span, collect(GreenNode{SyntaxHead}, cs))
130140
end
131141
end
132-

src/source_files.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ struct SourceFile
143143
line_starts::Vector{Int}
144144
end
145145

146-
Base.hash(s::SourceFile, h::UInt) = hash((s.code, s.byte_offset, s.filename, s.first_line, s.line_starts), h)
146+
Base.hash(s::SourceFile, h::UInt) =
147+
hash(s.code, hash(s.byte_offset, hash(s.filename, hash(s.first_line, hash(s.line_starts, h)))))
147148
function Base.:(==)(a::SourceFile, b::SourceFile)
148149
a.code == b.code && a.byte_offset == b.byte_offset && a.filename == b.filename &&
149150
a.first_line == b.first_line && a.line_starts == b.line_starts

src/syntax_tree.jl

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,18 @@ mutable struct TreeNode{NodeData} # ? prevent others from using this with Node
1818
end
1919

2020
# Exclude parent from hash and equality checks. This means that subtrees can compare equal.
21-
Base.hash(node::TreeNode, h::UInt) = hash((node.children, node.data), h)
21+
function Base.hash(node::TreeNode, h::UInt)
22+
h = hash(node.data, h)
23+
children = node.children
24+
if children === nothing
25+
return hash(nothing, h)
26+
else # optimization - avoid extra allocations from `hash(::AbstractVector, ::UInt)`
27+
for child in children
28+
h = hash(child, h)
29+
end
30+
return h
31+
end
32+
end
2233
function Base.:(==)(a::TreeNode{T}, b::TreeNode{T}) where T
2334
a.children == b.children && a.data == b.data
2435
end
@@ -50,9 +61,14 @@ struct SyntaxData <: AbstractSyntaxData
5061
val::Any
5162
end
5263

53-
Base.hash(data::SyntaxData, h::UInt) = hash((data.source, data.raw, data.position, data.val), h)
64+
Base.hash(data::SyntaxData, h::UInt) =
65+
hash(data.source, hash(data.raw, hash(data.position,
66+
# Avoid dynamic dispatch:
67+
# This does not support custom `hash` implementation that may be defined for `typeof(data.val)`,
68+
# However, such custom user types should not generally appear in the AST.
69+
Core.invoke(hash, Tuple{Any,UInt}, data.val, h))))
5470
function Base.:(==)(a::SyntaxData, b::SyntaxData)
55-
a.source == b.source && a.raw == b.raw && a.position == b.position && a.val == b.val
71+
a.source == b.source && a.raw == b.raw && a.position == b.position && a.val === b.val
5672
end
5773

5874
"""

0 commit comments

Comments
 (0)