Skip to content

Commit d29a684

Browse files
committed
Graph utils: unalias_nodes, prune
1 parent 9cd4df4 commit d29a684

File tree

2 files changed

+311
-1
lines changed

2 files changed

+311
-1
lines changed

src/syntax_graph.jl

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,3 +823,140 @@ end
823823
# out
824824
# end
825825

826+
#-------------------------------------------------------------------------------
827+
# Data structure utilities
828+
829+
"""
830+
unalias_nodes(st::SyntaxTree)
831+
832+
Return a tree where each descendent of `st` has exactly one parent in `st`. The
833+
returned tree is identical to `st` in all but underlying representation, where
834+
every additional parent to a subtree generates a copy of that subtree. Apart
835+
from this, `unalias_nodes` should not allocate new nodes unnecessarily.
836+
837+
unalias_nodes(sl::SyntaxList)
838+
839+
If a `SyntaxList` is given, every resulting tree will be unique with respect to
840+
each other as well as internally. A duplicate entry will produce a copied tree.
841+
"""
842+
unalias_nodes(st::SyntaxTree) = SyntaxTree(
843+
syntax_graph(st),
844+
_unalias_nodes(syntax_graph(st), st._id, Set{NodeId}(), Set{Int}()))
845+
846+
function unalias_nodes(sl::SyntaxList)
847+
seen = Set{NodeId}()
848+
seen_edges = Set{Int}()
849+
SyntaxList(syntax_graph(sl),
850+
map(id->_unalias_nodes(syntax_graph(sl), id, seen, seen_edges), sl.ids))
851+
end
852+
853+
# Note that `seen_edges` is only needed for when edge ranges overlap, which is a
854+
# situation we don't produce yet.
855+
function _unalias_nodes(graph::SyntaxGraph, id::NodeId, seen::Set{NodeId}, seen_edges::Set{Int})
856+
if id in seen
857+
id = copy_ast(graph, SyntaxTree(graph, id); copy_source=false)._id
858+
end
859+
if !isempty(intersect(seen_edges, graph.edge_ranges[id]))
860+
# someone is referencing our edges; run away so we can modify them
861+
next_edge = length(graph.edges) + 1
862+
append!(graph.edges, children(graph, id))
863+
graph.edge_ranges[id] = next_edge:lastindex(graph.edges)
864+
end
865+
union!(seen_edges, graph.edge_ranges[id])
866+
push!(seen, id)
867+
868+
for (c, i) in zip(children(graph, id), graph.edge_ranges[id])
869+
c2 = _unalias_nodes(graph, c, seen, seen_edges)
870+
# the new child should be the same in every way to the old one, so
871+
# modify the edge instead of triggering copies with `mapchildren`
872+
c !== c2 && (graph.edges[i] = c2)
873+
end
874+
return id
875+
end
876+
877+
"""
878+
Return a tree where unreachable nodes (non-descendents of `st`) in its graph
879+
have been deleted, and where provenance data has been minimized.
880+
881+
If `keep` is not nothing, also consider descendents of it reachable. By
882+
default, `keep` is the final node(s) in the provenance chain of `st`. This
883+
means that, by default, we have expression provenance back to the original
884+
parsed nodes, but no lowering-internal provenance. In any case, we still retain
885+
byte (or, with old macros, LineNumberNode) provenance.
886+
887+
Provenance shrinkage: Green trees are omitted from SourceRefs. If node A
888+
references node B as its source and B is unreachable, A adopts the source of B.
889+
"""
890+
function prune(st::SyntaxTree; keep::Union{SyntaxTree, SyntaxList, Nothing}=flattened_provenance(st))
891+
entrypoints = NodeId[st._id]
892+
keep isa SyntaxList && append!(entrypoints, keep.ids)
893+
keep isa SyntaxTree && push!(entrypoints, keep._id)
894+
prune(syntax_graph(st), unique(entrypoints))[1]
895+
end
896+
897+
# This implementation unaliases nodes, which undoes a small amount of space
898+
# savings from the DAG representation, but it allows us to (1) omit the whole
899+
# `edges` array (TODO), and (2) make the pruning algorithm simpler. The
900+
# invariant we win is having `edge_ranges` be one or more interleaved
901+
# level-order traversals where every node's set of children is contiguous, so
902+
# its entries can refer to itself instead of an external `edges` vector.
903+
function prune(graph1_a::SyntaxGraph, entrypoints_a::Vector{NodeId})
904+
@assert length(entrypoints_a) === length(unique(entrypoints_a))
905+
unaliased = unalias_nodes(SyntaxList(graph1_a, entrypoints_a))
906+
(graph1, entrypoints) = (unaliased.graph, unaliased.ids)
907+
nodes1 = NodeId[entrypoints...] # Reachable subset of graph1
908+
map12 = Dict{NodeId, Int}() # graph1 => graph2 mapping
909+
graph2 = ensure_attributes!(SyntaxGraph(); attrdefs(graph1)...)
910+
while length(graph2.edge_ranges) < length(nodes1)
911+
n2 = length(graph2.edge_ranges) + 1
912+
n1 = nodes1[n2]
913+
map12[n1] = n2
914+
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
915+
(0:-1) : (1:numchildren(graph1, n1)) .+ length(nodes1))
916+
for c1 in children(graph1, n1)
917+
push!(nodes1, c1)
918+
end
919+
end
920+
graph2.edges = 1:length(nodes1) # our reward for unaliasing
921+
922+
for attr in attrnames(graph1)
923+
attr === :source && continue
924+
for (n2, n1) in enumerate(nodes1)
925+
if (begin
926+
attrval = get(graph1.attributes[attr], n1, nothing)
927+
!isnothing(attrval)
928+
end)
929+
graph2.attributes[attr][n2] = attrval
930+
end
931+
end
932+
end
933+
934+
# Prune provenance. Tricky due to dangling `.source` references.
935+
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
936+
function get_resolved!(id1::NodeId)
937+
res = get(resolved_sources, id1, nothing)
938+
if isnothing(res)
939+
src1 = graph1.source[id1]
940+
res = if haskey(map12, src1)
941+
map12[src1]
942+
elseif src1 isa NodeId
943+
get_resolved!(src1)
944+
elseif src1 isa Tuple
945+
map(get_resolved!, src1)
946+
elseif src1 isa SourceRef
947+
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
948+
else
949+
src1
950+
end
951+
resolved_sources[id1] = res
952+
end
953+
return res
954+
end
955+
956+
for (n2, n1) in enumerate(nodes1)
957+
graph2.source[n2] = get_resolved!(n1)
958+
end
959+
960+
# The first n entries in nodes1 were our entrypoints, unique from unaliasing
961+
return SyntaxList(graph2, 1:length(entrypoints))
962+
end

test/syntax_graph.jl

Lines changed: 174 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@ end
6363
function testgraph(edge_ranges, edges, more_attrs...)
6464
kinds = Dict(map(i->(i=>K"block"), eachindex(edge_ranges)))
6565
sources = Dict(map(i->(i=>LineNumberNode(i)), eachindex(edge_ranges)))
66+
orig = Dict(map(i->(i=>i), eachindex(edge_ranges)))
6667
SyntaxGraph(
6768
edge_ranges,
6869
edges,
69-
Dict(:kind => kinds, :source => sources, more_attrs...))
70+
Dict(:kind => kinds, :source => sources,
71+
:orig => orig, more_attrs...))
7072
end
7173

7274
@testset "copy_ast" begin
@@ -107,4 +109,175 @@ end
107109
# Disallow for now, since we can't prevent dangling sourcerefs
108110
@test_throws ErrorException JuliaLowering.copy_ast(new_g, st; copy_source=false)
109111
end
112+
113+
@testset "unalias_nodes" begin
114+
# 1 -+-> 2 ->-> 4
115+
# | |
116+
# +-> 3 -+
117+
g = testgraph([1:2, 3:3, 4:4, 0:-1], [2, 3, 4, 4])
118+
st = SyntaxTree(g, 1)
119+
stu = JuliaLowering.unalias_nodes(st)
120+
@test st stu
121+
@test length(stu._graph.edge_ranges) == 5
122+
@test length(stu._graph.edges) == 4
123+
# Properties of node 4 should be preserved
124+
@test 4 == stu[1][1].orig == stu[2][1].orig
125+
@test st[1][1].source == stu[1][1].source == stu[2][1].source
126+
@test stu[1][1]._id != stu[2][1]._id
127+
128+
# Try again with overlapping edge_ranges
129+
g = testgraph([1:2, 3:3, 3:3, 0:-1], [2, 3, 4])
130+
st = SyntaxTree(g, 1)
131+
stu = JuliaLowering.unalias_nodes(st)
132+
@test st stu
133+
@test length(stu._graph.edge_ranges) == 5
134+
@test length(stu._graph.edges) == 4
135+
@test 4 == stu[1][1].orig == stu[2][1].orig
136+
@test st[1][1].source == stu[1][1].source == stu[2][1].source
137+
@test stu[1][1]._id != stu[2][1]._id
138+
139+
# +-> 5
140+
# |
141+
# 1 -+-> 2 -+---->>>-> 6
142+
# | |||
143+
# +-> 3 -> 7 -+||
144+
# | ||
145+
# +-> 4 -+-----+|
146+
# | |
147+
# +------+
148+
g = testgraph([1:3, 4:5, 6:6, 7:8, 0:-1, 0:-1, 9:9],
149+
[2, 3, 4, 5, 6, 7, 6, 6, 6])
150+
st = SyntaxTree(g, 1)
151+
stu = JuliaLowering.unalias_nodes(st)
152+
@test st stu
153+
# node 6 should be copied three times
154+
@test length(stu._graph.edge_ranges) == 10
155+
@test length(stu._graph.edges) == 9
156+
# the four copies of node 6 should have attrs identical to the original and distinct ids
157+
@test 6 == stu[1][2].orig == stu[2][1][1].orig == stu[3][1].orig == stu[3][2].orig
158+
@test stu[1][2]._id != stu[2][1][1]._id != stu[3][1]._id != stu[3][2]._id
159+
160+
# 1 -+-> 2 ->-> 4 -+----> 5 ->-> 7
161+
# | | | |
162+
# +-> 3 -+ +-->-> 6 -+
163+
# | |
164+
# +------------+
165+
g = testgraph([1:2, 3:3, 4:5, 6:7, 8:8, 9:9, 0:-1],
166+
[2,3,4,4,6,5,6,7,7])
167+
st = SyntaxTree(g, 1)
168+
stu = JuliaLowering.unalias_nodes(st)
169+
@test st stu
170+
@test length(stu._graph.edge_ranges) == 15
171+
@test length(stu._graph.edges) == 14
172+
# attrs of nodes 4-7
173+
@test 4 == stu[1][1].orig == stu[2][1].orig
174+
@test 5 == stu[1][1][1].orig == stu[2][1][1].orig
175+
@test 6 == stu[1][1][2].orig == stu[2][1][2].orig == stu[2][2].orig
176+
@test 7 == stu[1][1][1][1].orig == stu[1][1][2][1].orig ==
177+
stu[2][1][1][1].orig == stu[2][1][2][1].orig == stu[2][2][1].orig
178+
# ensure no duplication
179+
@test stu[1][1][1][1]._id != stu[1][1][2][1]._id !=
180+
stu[2][1][1][1]._id != stu[2][1][2][1]._id != stu[2][2][1]._id
181+
end
182+
183+
@testset "prune" begin
184+
# [1]-+-> 2 5 --> 6
185+
# |
186+
# +-> 3 --> 4 7
187+
g = testgraph([1:2, 0:-1, 3:3, 0:-1, 4:4, 0:-1, 0:-1], [2, 3, 4, 6])
188+
st = SyntaxTree(g, 1)
189+
stp = JuliaLowering.prune(st)
190+
@test st stp
191+
@test length(syntax_graph(stp).edge_ranges) === 4
192+
@test stp.source == LineNumberNode(1)
193+
@test stp[1].source == LineNumberNode(2)
194+
@test stp[2].source == LineNumberNode(3)
195+
@test stp[2][1].source == LineNumberNode(4)
196+
197+
# (also checks that the last prune didn't destroy the graph)
198+
# 1 -+-> 2 5 --> 6
199+
# |
200+
# +-> 3 --> 4 [7]
201+
st = SyntaxTree(g, 7)
202+
stp = JuliaLowering.prune(st)
203+
@test st stp
204+
@test length(syntax_graph(stp).edge_ranges) === 1
205+
@test stp.orig == 7
206+
207+
# 1 -+->[2]->-> 4
208+
# | |
209+
# +-> 3 -+
210+
g = testgraph([1:2, 3:3, 4:4, 0:-1], [2, 3, 4, 4])
211+
st = SyntaxTree(g, 2)
212+
stp = JuliaLowering.prune(st)
213+
@test st stp
214+
@test length(syntax_graph(stp).edge_ranges) === 2
215+
@test stp.orig == 2
216+
@test stp[1].orig == 4
217+
218+
# 9 -->[1]--> 5 src(1) = 2
219+
# 10 --> 2 --> 6 src(2) = 3
220+
# 11 --> 3 --> 7 src(3) = 4
221+
# 12 --> 4 --> 8 else src(i) = line(i)
222+
g = testgraph([1:1, 2:2, 3:3, 4:4, 0:-1, 0:-1, 0:-1, 0:-1, 5:5, 6:6, 7:7, 8:8],
223+
[5, 6, 7, 8, 1, 2, 3, 4],
224+
:source => Dict(
225+
1=>2, 2=>3, 3=>4,
226+
map(i->(i=>LineNumberNode(i)), 4:12)...))
227+
st = SyntaxTree(g, 1)
228+
stp = JuliaLowering.prune(st)
229+
@test st stp
230+
# 1, 5, 4, 8 should remain
231+
@test length(syntax_graph(stp).edge_ranges) === 4
232+
@test stp.source isa NodeId
233+
orig_4 = SyntaxTree(syntax_graph(stp), stp.source)
234+
@test orig_4.source === LineNumberNode(4)
235+
@test numchildren(orig_4) === 1
236+
@test orig_4[1].source === LineNumberNode(8)
237+
@test stp[1].source === LineNumberNode(5)
238+
239+
# Try again with node 3 explicitly marked reachable
240+
stp = JuliaLowering.prune(st, keep=JuliaLowering.SyntaxList(g, NodeId[3, 4]))
241+
@test st stp
242+
# 1, 5, 4, 8, and now 3, 7 as well
243+
@test length(syntax_graph(stp).edge_ranges) === 6
244+
@test stp.source isa NodeId
245+
@test stp[1].source === LineNumberNode(5)
246+
247+
orig_3 = SyntaxTree(syntax_graph(stp), stp.source)
248+
@test orig_3.source isa NodeId
249+
orig_4 = SyntaxTree(syntax_graph(stp), orig_3.source)
250+
@test orig_4.source === LineNumberNode(4)
251+
252+
@test numchildren(orig_3) === 1
253+
@test numchildren(orig_4) === 1
254+
@test orig_3[1].source === LineNumberNode(7)
255+
@test orig_4[1].source === LineNumberNode(8)
256+
257+
# Try again with no node provenance
258+
stp = JuliaLowering.prune(st, keep=nothing)
259+
@test st stp
260+
@test length(syntax_graph(stp).edge_ranges) === 2
261+
@test stp.source === LineNumberNode(4)
262+
@test stp[1].source === LineNumberNode(5)
263+
264+
# "real world" test with lowered output---not many properties we can
265+
# check without fragile tests, but there are some.
266+
test_mod = Module()
267+
code = "begin; x1=1; x2=2; x3=3; x4=begin; 4; end; begin; end; end"
268+
st0 = parsestmt(SyntaxTree, code)
269+
st5 = JuliaLowering.lower(test_mod, st0)
270+
stp = JuliaLowering.prune(st5)
271+
@test st5 stp
272+
@test length(syntax_graph(stp).edge_ranges) < length(syntax_graph(st5).edge_ranges)
273+
@test stp.source isa NodeId
274+
@test SyntaxTree(syntax_graph(stp), stp.source) st0
275+
@test sourcetext(stp) == code
276+
# try without preserving st0
277+
stp = JuliaLowering.prune(st5, keep=nothing)
278+
@test st5 stp
279+
@test length(syntax_graph(stp).edge_ranges) < length(syntax_graph(st5).edge_ranges)
280+
@test stp.source isa SourceRef
281+
@test sourcetext(stp) == code
282+
end
110283
end

0 commit comments

Comments
 (0)