Skip to content

Commit a178904

Browse files
committed
Graph utils: unalias_nodes, prune
1 parent 316f868 commit a178904

File tree

2 files changed

+311
-1
lines changed

2 files changed

+311
-1
lines changed

src/syntax_graph.jl

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,3 +815,140 @@ end
815815
# out
816816
# end
817817

818+
#-------------------------------------------------------------------------------
819+
# Data structure utilities
820+
821+
"""
822+
unalias_nodes(st::SyntaxTree)
823+
824+
Return a tree where each descendent of `st` has exactly one parent in `st`. The
825+
returned tree is identical to `st` in all but underlying representation, where
826+
every additional parent to a subtree generates a copy of that subtree. Apart
827+
from this, `unalias_nodes` should not allocate new nodes unnecessarily.
828+
829+
unalias_nodes(sl::SyntaxList)
830+
831+
If a `SyntaxList` is given, every resulting tree will be unique with respect to
832+
each other as well as internally. A duplicate entry will produce a copied tree.
833+
"""
834+
unalias_nodes(st::SyntaxTree) = SyntaxTree(
835+
syntax_graph(st),
836+
_unalias_nodes(syntax_graph(st), st._id, Set{NodeId}(), Set{Int}()))
837+
838+
function unalias_nodes(sl::SyntaxList)
839+
seen = Set{NodeId}()
840+
seen_edges = Set{Int}()
841+
SyntaxList(syntax_graph(sl),
842+
map(id->_unalias_nodes(syntax_graph(sl), id, seen, seen_edges), sl.ids))
843+
end
844+
845+
# Note that `seen_edges` is only needed for when edge ranges overlap, which is a
846+
# situation we don't produce yet.
847+
function _unalias_nodes(graph::SyntaxGraph, id::NodeId, seen::Set{NodeId}, seen_edges::Set{Int})
848+
if id in seen
849+
id = copy_ast(graph, SyntaxTree(graph, id); copy_source=false)._id
850+
end
851+
if !isempty(intersect(seen_edges, graph.edge_ranges[id]))
852+
# someone is referencing our edges; run away so we can modify them
853+
next_edge = length(graph.edges) + 1
854+
append!(graph.edges, children(graph, id))
855+
graph.edge_ranges[id] = next_edge:lastindex(graph.edges)
856+
end
857+
union!(seen_edges, graph.edge_ranges[id])
858+
push!(seen, id)
859+
860+
for (c, i) in zip(children(graph, id), graph.edge_ranges[id])
861+
c2 = _unalias_nodes(graph, c, seen, seen_edges)
862+
# the new child should be the same in every way to the old one, so
863+
# modify the edge instead of triggering copies with `mapchildren`
864+
c !== c2 && (graph.edges[i] = c2)
865+
end
866+
return id
867+
end
868+
869+
"""
870+
Return a tree where unreachable nodes (non-descendents of `st`) in its graph
871+
have been deleted, and where provenance data has been minimized.
872+
873+
If `keep` is not nothing, also consider descendents of it reachable. By
874+
default, `keep` is the final node(s) in the provenance chain of `st`. This
875+
means that, by default, we have expression provenance back to the original
876+
parsed nodes, but no lowering-internal provenance. In any case, we still retain
877+
byte (or, with old macros, LineNumberNode) provenance.
878+
879+
Provenance shrinkage: Green trees are omitted from SourceRefs. If node A
880+
references node B as its source and B is unreachable, A adopts the source of B.
881+
"""
882+
function prune(st::SyntaxTree; keep::Union{SyntaxTree, SyntaxList, Nothing}=flattened_provenance(st))
883+
entrypoints = NodeId[st._id]
884+
keep isa SyntaxList && append!(entrypoints, keep.ids)
885+
keep isa SyntaxTree && push!(entrypoints, keep._id)
886+
prune(syntax_graph(st), unique(entrypoints))[1]
887+
end
888+
889+
# This implementation unaliases nodes, which undoes a small amount of space
890+
# savings from the DAG representation, but it allows us to (1) omit the whole
891+
# `edges` array (TODO), and (2) make the pruning algorithm simpler. The
892+
# invariant we win is having `edge_ranges` be one or more interleaved
893+
# level-order traversals where every node's set of children is contiguous, so
894+
# its entries can refer to itself instead of an external `edges` vector.
895+
function prune(graph1_a::SyntaxGraph, entrypoints_a::Vector{NodeId})
896+
@assert length(entrypoints_a) === length(unique(entrypoints_a))
897+
unaliased = unalias_nodes(SyntaxList(graph1_a, entrypoints_a))
898+
(graph1, entrypoints) = (unaliased.graph, unaliased.ids)
899+
nodes1 = NodeId[entrypoints...] # Reachable subset of graph1
900+
map12 = Dict{NodeId, Int}() # graph1 => graph2 mapping
901+
graph2 = ensure_attributes!(SyntaxGraph(); attrdefs(graph1)...)
902+
while length(graph2.edge_ranges) < length(nodes1)
903+
n2 = length(graph2.edge_ranges) + 1
904+
n1 = nodes1[n2]
905+
map12[n1] = n2
906+
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
907+
(0:-1) : (1:numchildren(graph1, n1)) .+ length(nodes1))
908+
for c1 in children(graph1, n1)
909+
push!(nodes1, c1)
910+
end
911+
end
912+
graph2.edges = 1:length(nodes1) # our reward for unaliasing
913+
914+
for attr in attrnames(graph1)
915+
attr === :source && continue
916+
for (n2, n1) in enumerate(nodes1)
917+
if (begin
918+
attrval = get(graph1.attributes[attr], n1, nothing)
919+
!isnothing(attrval)
920+
end)
921+
graph2.attributes[attr][n2] = attrval
922+
end
923+
end
924+
end
925+
926+
# Prune provenance. Tricky due to dangling `.source` references.
927+
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
928+
function get_resolved!(id1::NodeId)
929+
res = get(resolved_sources, id1, nothing)
930+
if isnothing(res)
931+
src1 = graph1.source[id1]
932+
res = if haskey(map12, src1)
933+
map12[src1]
934+
elseif src1 isa NodeId
935+
get_resolved!(src1)
936+
elseif src1 isa Tuple
937+
map(get_resolved!, src1)
938+
elseif src1 isa SourceRef
939+
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
940+
else
941+
src1
942+
end
943+
resolved_sources[id1] = res
944+
end
945+
return res
946+
end
947+
948+
for (n2, n1) in enumerate(nodes1)
949+
graph2.source[n2] = get_resolved!(n1)
950+
end
951+
952+
# The first n entries in nodes1 were our entrypoints, unique from unaliasing
953+
return SyntaxList(graph2, 1:length(entrypoints))
954+
end

test/syntax_graph.jl

Lines changed: 174 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@ end
6363
function testgraph(edge_ranges, edges, more_attrs...)
6464
kinds = Dict(map(i->(i=>K"block"), eachindex(edge_ranges)))
6565
sources = Dict(map(i->(i=>LineNumberNode(i)), eachindex(edge_ranges)))
66+
orig = Dict(map(i->(i=>i), eachindex(edge_ranges)))
6667
SyntaxGraph(
6768
edge_ranges,
6869
edges,
69-
Dict(:kind => kinds, :source => sources, more_attrs...))
70+
Dict(:kind => kinds, :source => sources,
71+
:orig => orig, more_attrs...))
7072
end
7173

7274
@testset "copy_ast" begin
@@ -107,4 +109,175 @@ end
107109
# Disallow for now, since we can't prevent dangling sourcerefs
108110
@test_throws ErrorException JuliaLowering.copy_ast(new_g, st; copy_source=false)
109111
end
112+
113+
@testset "unalias_nodes" begin
114+
# 1 -+-> 2 ->-> 4
115+
# | |
116+
# +-> 3 -+
117+
g = testgraph([1:2, 3:3, 4:4, 0:-1], [2, 3, 4, 4])
118+
st = SyntaxTree(g, 1)
119+
stu = JuliaLowering.unalias_nodes(st)
120+
@test st stu
121+
@test length(stu._graph.edge_ranges) == 5
122+
@test length(stu._graph.edges) == 4
123+
# Properties of node 4 should be preserved
124+
@test 4 == stu[1][1].orig == stu[2][1].orig
125+
@test st[1][1].source == stu[1][1].source == stu[2][1].source
126+
@test stu[1][1]._id != stu[2][1]._id
127+
128+
# Try again with overlapping edge_ranges
129+
g = testgraph([1:2, 3:3, 3:3, 0:-1], [2, 3, 4])
130+
st = SyntaxTree(g, 1)
131+
stu = JuliaLowering.unalias_nodes(st)
132+
@test st stu
133+
@test length(stu._graph.edge_ranges) == 5
134+
@test length(stu._graph.edges) == 4
135+
@test 4 == stu[1][1].orig == stu[2][1].orig
136+
@test st[1][1].source == stu[1][1].source == stu[2][1].source
137+
@test stu[1][1]._id != stu[2][1]._id
138+
139+
# +-> 5
140+
# |
141+
# 1 -+-> 2 -+---->>>-> 6
142+
# | |||
143+
# +-> 3 -> 7 -+||
144+
# | ||
145+
# +-> 4 -+-----+|
146+
# | |
147+
# +------+
148+
g = testgraph([1:3, 4:5, 6:6, 7:8, 0:-1, 0:-1, 9:9],
149+
[2, 3, 4, 5, 6, 7, 6, 6, 6])
150+
st = SyntaxTree(g, 1)
151+
stu = JuliaLowering.unalias_nodes(st)
152+
@test st stu
153+
# node 6 should be copied three times
154+
@test length(stu._graph.edge_ranges) == 10
155+
@test length(stu._graph.edges) == 9
156+
# the four copies of node 6 should have attrs identical to the original and distinct ids
157+
@test 6 == stu[1][2].orig == stu[2][1][1].orig == stu[3][1].orig == stu[3][2].orig
158+
@test stu[1][2]._id != stu[2][1][1]._id != stu[3][1]._id != stu[3][2]._id
159+
160+
# 1 -+-> 2 ->-> 4 -+----> 5 ->-> 7
161+
# | | | |
162+
# +-> 3 -+ +-->-> 6 -+
163+
# | |
164+
# +------------+
165+
g = testgraph([1:2, 3:3, 4:5, 6:7, 8:8, 9:9, 0:-1],
166+
[2,3,4,4,6,5,6,7,7])
167+
st = SyntaxTree(g, 1)
168+
stu = JuliaLowering.unalias_nodes(st)
169+
@test st stu
170+
@test length(stu._graph.edge_ranges) == 15
171+
@test length(stu._graph.edges) == 14
172+
# attrs of nodes 4-7
173+
@test 4 == stu[1][1].orig == stu[2][1].orig
174+
@test 5 == stu[1][1][1].orig == stu[2][1][1].orig
175+
@test 6 == stu[1][1][2].orig == stu[2][1][2].orig == stu[2][2].orig
176+
@test 7 == stu[1][1][1][1].orig == stu[1][1][2][1].orig ==
177+
stu[2][1][1][1].orig == stu[2][1][2][1].orig == stu[2][2][1].orig
178+
# ensure no duplication
179+
@test stu[1][1][1][1]._id != stu[1][1][2][1]._id !=
180+
stu[2][1][1][1]._id != stu[2][1][2][1]._id != stu[2][2][1]._id
181+
end
182+
183+
@testset "prune" begin
184+
# [1]-+-> 2 5 --> 6
185+
# |
186+
# +-> 3 --> 4 7
187+
g = testgraph([1:2, 0:-1, 3:3, 0:-1, 4:4, 0:-1, 0:-1], [2, 3, 4, 6])
188+
st = SyntaxTree(g, 1)
189+
stp = JuliaLowering.prune(st)
190+
@test st stp
191+
@test length(syntax_graph(stp).edge_ranges) === 4
192+
@test stp.source == LineNumberNode(1)
193+
@test stp[1].source == LineNumberNode(2)
194+
@test stp[2].source == LineNumberNode(3)
195+
@test stp[2][1].source == LineNumberNode(4)
196+
197+
# (also checks that the last prune didn't destroy the graph)
198+
# 1 -+-> 2 5 --> 6
199+
# |
200+
# +-> 3 --> 4 [7]
201+
st = SyntaxTree(g, 7)
202+
stp = JuliaLowering.prune(st)
203+
@test st stp
204+
@test length(syntax_graph(stp).edge_ranges) === 1
205+
@test stp.orig == 7
206+
207+
# 1 -+->[2]->-> 4
208+
# | |
209+
# +-> 3 -+
210+
g = testgraph([1:2, 3:3, 4:4, 0:-1], [2, 3, 4, 4])
211+
st = SyntaxTree(g, 2)
212+
stp = JuliaLowering.prune(st)
213+
@test st stp
214+
@test length(syntax_graph(stp).edge_ranges) === 2
215+
@test stp.orig == 2
216+
@test stp[1].orig == 4
217+
218+
# 9 -->[1]--> 5 src(1) = 2
219+
# 10 --> 2 --> 6 src(2) = 3
220+
# 11 --> 3 --> 7 src(3) = 4
221+
# 12 --> 4 --> 8 else src(i) = line(i)
222+
g = testgraph([1:1, 2:2, 3:3, 4:4, 0:-1, 0:-1, 0:-1, 0:-1, 5:5, 6:6, 7:7, 8:8],
223+
[5, 6, 7, 8, 1, 2, 3, 4],
224+
:source => Dict(
225+
1=>2, 2=>3, 3=>4,
226+
map(i->(i=>LineNumberNode(i)), 4:12)...))
227+
st = SyntaxTree(g, 1)
228+
stp = JuliaLowering.prune(st)
229+
@test st stp
230+
# 1, 5, 4, 8 should remain
231+
@test length(syntax_graph(stp).edge_ranges) === 4
232+
@test stp.source isa NodeId
233+
orig_4 = SyntaxTree(syntax_graph(stp), stp.source)
234+
@test orig_4.source === LineNumberNode(4)
235+
@test numchildren(orig_4) === 1
236+
@test orig_4[1].source === LineNumberNode(8)
237+
@test stp[1].source === LineNumberNode(5)
238+
239+
# Try again with node 3 explicitly marked reachable
240+
stp = JuliaLowering.prune(st, keep=JuliaLowering.SyntaxList(g, NodeId[3, 4]))
241+
@test st stp
242+
# 1, 5, 4, 8, and now 3, 7 as well
243+
@test length(syntax_graph(stp).edge_ranges) === 6
244+
@test stp.source isa NodeId
245+
@test stp[1].source === LineNumberNode(5)
246+
247+
orig_3 = SyntaxTree(syntax_graph(stp), stp.source)
248+
@test orig_3.source isa NodeId
249+
orig_4 = SyntaxTree(syntax_graph(stp), orig_3.source)
250+
@test orig_4.source === LineNumberNode(4)
251+
252+
@test numchildren(orig_3) === 1
253+
@test numchildren(orig_4) === 1
254+
@test orig_3[1].source === LineNumberNode(7)
255+
@test orig_4[1].source === LineNumberNode(8)
256+
257+
# Try again with no node provenance
258+
stp = JuliaLowering.prune(st, keep=nothing)
259+
@test st stp
260+
@test length(syntax_graph(stp).edge_ranges) === 2
261+
@test stp.source === LineNumberNode(4)
262+
@test stp[1].source === LineNumberNode(5)
263+
264+
# "real world" test with lowered output---not many properties we can
265+
# check without fragile tests, but there are some.
266+
test_mod = Module()
267+
code = "begin; x1=1; x2=2; x3=3; x4=begin; 4; end; begin; end; end"
268+
st0 = parsestmt(SyntaxTree, code)
269+
st5 = JuliaLowering.lower(test_mod, st0)
270+
stp = JuliaLowering.prune(st5)
271+
@test st5 stp
272+
@test length(syntax_graph(stp).edge_ranges) < length(syntax_graph(st5).edge_ranges)
273+
@test stp.source isa NodeId
274+
@test SyntaxTree(syntax_graph(stp), stp.source) st0
275+
@test sourcetext(stp) == code
276+
# try without preserving st0
277+
stp = JuliaLowering.prune(st5, keep=nothing)
278+
@test st5 stp
279+
@test length(syntax_graph(stp).edge_ranges) < length(syntax_graph(st5).edge_ranges)
280+
@test stp.source isa SourceRef
281+
@test sourcetext(stp) == code
282+
end
110283
end

0 commit comments

Comments
 (0)