Skip to content

Commit b0cd47c

Browse files
committed
Graph utils: unalias_nodes, annotate_parent, prune
1 parent 0fa9152 commit b0cd47c

File tree

2 files changed

+315
-0
lines changed

2 files changed

+315
-0
lines changed

src/syntax_graph.jl

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,3 +763,218 @@ end
763763
# out
764764
# end
765765

766+
#-------------------------------------------------------------------------------
767+
# Data structure utilities
768+
769+
"""
770+
unalias_nodes(st::SyntaxTree)
771+
772+
Return a tree where each descendent of `st` has exactly one parent in `st`. The
773+
returned tree is identical to `st` in all but underlying representation, where
774+
every additional parent to a subtree generates a copy of that subtree. Apart
775+
from this, `unalias_nodes!` should not allocate new nodes unnecessarily.
776+
777+
unalias_nodes(sl::SyntaxList)
778+
779+
If a `SyntaxList` is given, every resulting tree will be unique with respect to
780+
each other as well as internally. A duplicate id will produce a copied tree.
781+
"""
782+
unalias_nodes(st::SyntaxTree) = SyntaxTree(
783+
syntax_graph(st),
784+
_unalias_nodes!(syntax_graph(st), st._id, Set{NodeId}(), Set{Int}()))
785+
786+
function unalias_nodes(sl::SyntaxList)
787+
seen = Set{NodeId}()
788+
seen_edges = Set{Int}()
789+
SyntaxList(syntax_graph(sl),
790+
map(id->_unalias_nodes!(syntax_graph(sl), id, seen, seen_edges), sl.ids))
791+
end
792+
793+
# Note that `seen_edges` is only needed for when edge ranges overlap, which is a
794+
# situation we don't produce yet.
795+
function _unalias_nodes!(graph::SyntaxGraph, id::NodeId, seen::Set{NodeId}, seen_edges::Set{Int})
796+
if id in seen
797+
# all nodes from `copy_ast` are fresh/unreferenced, and distinct from
798+
# each other, so no further recursion is needed.
799+
return copy_ast(graph, SyntaxTree(graph, id); copy_source=false)._id
800+
end
801+
if !isempty(intersect(seen_edges, graph.edge_ranges[id]))
802+
# someone is referencing our edges; run away so we can modify them
803+
next_edge = length(graph.edges) + 1
804+
append!(graph.edges, children(graph, id))
805+
graph.edge_ranges[id] = next_edge:lastindex(graph.edges)
806+
end
807+
union!(seen_edges, graph.edge_ranges[id])
808+
push!(seen, id)
809+
810+
for (c, i) in zip(children(graph, id), graph.edge_ranges[id])
811+
c2 = _unalias_nodes!(graph, c, seen, seen_edges)
812+
# the new child should be the same in every way to the old one, so
813+
# modify the edge instead of triggering copies with `mapchildren`
814+
c !== c2 && (graph.edges[i] = c2)
815+
end
816+
return id
817+
end
818+
819+
"""
820+
Give each descendent of `st` a `parent::NodeId` attribute.
821+
"""
822+
function annotate_parent!(st::SyntaxTree)
823+
g = unfreeze_attrs(syntax_graph(st))
824+
st = unalias_nodes(SyntaxTree(g, st._id))
825+
ensure_attributes!(g; parent=NodeId)
826+
mapchildren(t->_annotate_parent!(t, st._id), syntax_graph(st), st)
827+
end
828+
829+
function _annotate_parent!(st::SyntaxTree, pid::NodeId)
830+
setattr!(st; parent=pid)
831+
mapchildren(t->_annotate_parent!(t, st._id), syntax_graph(st), st)
832+
end
833+
834+
"""
835+
Return a tree where unreachable nodes (non-descendents of `st`) in its graph
836+
have been deleted, and where provenance data has been minimized.
837+
838+
If `keep` is not nothing, also consider descendents of it reachable. By
839+
default, `keep` is the final node(s) in the provenance chain of `st`. This
840+
means that, by default, we have expression provenance back to the original
841+
parsed nodes, but no lowering-internal provenance. In any case, we still retain
842+
byte (or, with old macros, LineNumberNode) provenance.
843+
844+
Provenance shrinkage: Green trees are omitted from SourceRefs. If node A
845+
references node B as its source and B is unreachable, A adopts the source of B.
846+
847+
No attributes are deleted (but that can be done separately). Possible TODO:
848+
Non-sparse attributes could be stored more compactly
849+
"""
850+
function prune(st::SyntaxTree; keep::Union{SyntaxTree, SyntaxList, Nothing}=flattened_provenance(st))
851+
entrypoints = NodeId[st._id]
852+
keep isa SyntaxList && append!(entrypoints, keep.ids)
853+
keep isa SyntaxTree && push!(entrypoints, keep._id)
854+
prune_impl(syntax_graph(st), entrypoints)[1]
855+
end
856+
857+
function prune_a(graph1::SyntaxGraph, entrypoints::Vector{NodeId})
858+
entrypoints = unique(entrypoints)
859+
nodes1 = NodeId[entrypoints...] # note nodes encountered >once appear once
860+
map12 = Dict{NodeId, Int}()
861+
graph2 = ensure_attributes!(SyntaxGraph(); attrtypes(graph1)...)
862+
while length(graph2.edge_ranges) < length(nodes1)
863+
n2 = length(graph2.edge_ranges) + 1
864+
n1 = nodes1[n2]
865+
map12[n1] = n2
866+
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
867+
(0:-1) : (1:numchildren(graph1, n1) .+ length(graph2.edges)))
868+
for c1 in children(graph1, n1)
869+
!haskey(c1, map12) && push!(nodes1, c1)
870+
push!(graph2.edges, map12[c1])
871+
end
872+
end
873+
874+
for attr in attrnames(graph1)
875+
attr === :source && continue
876+
for (n2, n1) in enumerate(nodes1)
877+
if (begin
878+
attrval = get(graph1.attributes[attr], n1, nothing)
879+
!isnothing(attrval)
880+
end)
881+
graph2.attributes[attr][n2] = attrval
882+
end
883+
end
884+
end
885+
886+
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
887+
function get_resolved!(id1::NodeId)
888+
res = get(resolved_sources, id1, nothing)
889+
if isnothing(res)
890+
src1 = graph1.source[id1]
891+
res = if haskey(map12, src1)
892+
map12[src1]
893+
elseif src1 isa NodeId
894+
get_resolved!(src1)
895+
elseif src1 isa Tuple
896+
map(get_resolved!, src1)
897+
elseif src1 isa SourceRef
898+
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
899+
else
900+
src1
901+
end
902+
resolved_sources[id1] = res
903+
end
904+
return res
905+
end
906+
907+
for (n2, n1) in enumerate(nodes1)
908+
graph2.source[n2] = get_resolved!(n1)
909+
end
910+
return SyntaxList(graph2, map(id->map12[id], entrypoints))
911+
end
912+
913+
# Experiment: What can we gain if we are allowed to assume nodes are unaliased?
914+
915+
# This undoes a small amount of space savings from the DAG representation, but
916+
# it allows us to (1) omit the whole `edges` array (TODO), and (2) make the
917+
# pruning algorithm simpler. The invariant we win is having `edge_ranges` be
918+
# one or more interleaved level-order traversals where every node's set of
919+
# children is contiguous, so its entries can refer to itself instead of `edges`.
920+
function prune_u(graph1_a::SyntaxGraph, entrypoints_a::Vector{NodeId})
921+
unaliased = unalias_nodes(SyntaxList(graph1_a, entrypoints_a))
922+
(graph1, entrypoints) = (unaliased.graph, unaliased.ids)
923+
nodes1 = NodeId[entrypoints...] # Reachable subset of graph1
924+
map12 = Dict{NodeId, Int}() # graph1 => graph2 mapping
925+
graph2 = ensure_attributes!(SyntaxGraph(); attrtypes(graph1)...)
926+
while length(graph2.edge_ranges) < length(nodes1)
927+
n2 = length(graph2.edge_ranges) + 1
928+
n1 = nodes1[n2]
929+
map12[n1] = n2
930+
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
931+
(0:-1) : (1:numchildren(graph1, n1)) .+ length(nodes1))
932+
for c1 in children(graph1, n1)
933+
push!(nodes1, c1)
934+
end
935+
end
936+
graph2.edges = 1:length(nodes1) # our reward for unaliasing
937+
938+
for attr in attrnames(graph1)
939+
attr === :source && continue
940+
for (n2, n1) in enumerate(nodes1)
941+
if (begin
942+
attrval = get(graph1.attributes[attr], n1, nothing)
943+
!isnothing(attrval)
944+
end)
945+
graph2.attributes[attr][n2] = attrval
946+
end
947+
end
948+
end
949+
950+
# Prune provenance. Tricky due to dangling `.source` references.
951+
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
952+
function get_resolved!(id1::NodeId)
953+
res = get(resolved_sources, id1, nothing)
954+
if isnothing(res)
955+
src1 = graph1.source[id1]
956+
res = if haskey(map12, src1)
957+
map12[src1]
958+
elseif src1 isa NodeId
959+
get_resolved!(src1)
960+
elseif src1 isa Tuple
961+
map(get_resolved!, src1)
962+
elseif src1 isa SourceRef
963+
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
964+
else
965+
src1
966+
end
967+
resolved_sources[id1] = res
968+
end
969+
return res
970+
end
971+
972+
for (n2, n1) in enumerate(nodes1)
973+
graph2.source[n2] = get_resolved!(n1)
974+
end
975+
976+
# The first n entries in nodes1 were our entrypoints, unique from unaliasing
977+
return SyntaxList(graph2, 1:length(entrypoints))
978+
end
979+
980+
const prune_impl = prune_u

test/syntax_graph.jl

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,104 @@
1616
@test kind(tree2) == K"block"
1717
@test kind(tree2[1]) == K"Identifier" && tree2[1].name_val == "x"
1818
@test kind(tree2[2]) == K"Identifier" && tree2[2].name_val == "some_unique_identifier"
19+
20+
nblocks(range) = Dict(map(i->(i=>K"block"), range))
21+
nlines(range) = Dict{NodeId, JuliaLowering.SourceAttrType}(
22+
map(i->(i=>LineNumberNode(i, "file")), range))
23+
24+
@testset "unalias_nodes" begin
25+
# 1 -+-> 2 ->-> 4
26+
# | |
27+
# +-> 3 -+
28+
g = SyntaxGraph([1:2, 3:3, 4:4, 0:-1], [2, 3, 4, 4],
29+
Dict(:kind => nblocks(1:4), :source => nlines(1:4),
30+
:foo => Dict(4=>"foo")))
31+
st = SyntaxTree(g, 1)
32+
stu = JuliaLowering.unalias_nodes(st)
33+
# Only node 4 should be copied, and no new edges are needed.
34+
@test stu._graph.edge_ranges == [1:2, 3:3, 4:4, 0:-1, 0:-1]
35+
@test stu._graph.edges == [2, 3, 4, 5]
36+
# Properties of node 4 should be preserved
37+
@test st[1][1].foo == stu[1][1].foo == stu[2][1].foo
38+
@test st[1][1].source == stu[1][1].source == stu[2][1].source
39+
# Try again with overlapping edge_ranges
40+
g = SyntaxGraph([1:2, 3:3, 3:3, 0:-1], [2, 3, 4],
41+
Dict(:kind => nblocks(1:4), :source => nlines(1:4),
42+
:foo => Dict(4=>"foo")))
43+
st = SyntaxTree(g, 1)
44+
stu = JuliaLowering.unalias_nodes(st)
45+
@test stu._graph.edge_ranges == [1:2, 3:3, 4:4, 0:-1, 0:-1]
46+
@test stu._graph.edges == [2, 3, 4, 5]
47+
@test st[1][1].foo == stu[1][1].foo == stu[2][1].foo
48+
@test st[1][1].source == stu[1][1].source == stu[2][1].source
49+
50+
# +-> 5
51+
# |
52+
# 1 -+-> 2 -+---->>>-> 6
53+
# | |||
54+
# +-> 3 -> 7 -+||
55+
# | ||
56+
# +-> 4 -+-----+|
57+
# | |
58+
# +------+
59+
g = SyntaxGraph([1:3, 4:5, 6:6, 7:8, 0:-1, 0:-1, 9:9],
60+
[2, 3, 4, 5, 6, 7, 6, 6, 6],
61+
Dict(:kind => nblocks(1:7), :source => nlines(1:7),
62+
:foo => Dict(6=>"foo")))
63+
st = SyntaxTree(g, 1)
64+
stu = JuliaLowering.unalias_nodes(st)
65+
# node 6 should be copied three times
66+
@test length(stu._graph.edge_ranges) == 10
67+
@test length(stu._graph.edges) == 9
68+
@test st[1][2].foo == stu[1][2].foo == stu[2][1][1].foo == stu[3][1].foo == stu[3][2].foo
69+
@test st[1][2].source == stu[1][2].source == stu[2][1][1].source == stu[3][1].source == stu[3][2].source
70+
71+
# 1 -+-> 2 ->-> 4 -+----> 5 ->-> 7
72+
# | | | |
73+
# +-> 3 -+ +-->-> 6 -+
74+
# | |
75+
# +------------+
76+
g = SyntaxGraph([1:2, 3:3, 4:5, 6:7, 8:8, 9:9, 0:-1],
77+
[2,3,4,4,6,5,6,7,7],
78+
Dict(:kind => nblocks(1:7), :source => nlines(1:7),
79+
:foo => Dict(4=>4, 5=>5, 6=>6, 7=>7)))
80+
st = SyntaxTree(g, 1)
81+
stu = JuliaLowering.unalias_nodes(st)
82+
@test length(stu._graph.edge_ranges) == 15
83+
@test length(stu._graph.edges) == 14
84+
# node 4
85+
@test st[1][1].foo == stu[1][1].foo == stu[2][1].foo
86+
# node 5
87+
@test st[1][1][1].foo == stu[1][1][1].foo == stu[2][1][1].foo
88+
# node 6
89+
@test st[1][1][2].foo == stu[1][1][2].foo == stu[2][1][2].foo == stu[2][2].foo
90+
# node 7
91+
@test st[1][1][1][1].foo == stu[1][1][1][1].foo == stu[1][1][2][1].foo ==
92+
stu[2][1][1][1].foo == stu[2][1][2][1].foo == stu[2][2][1].foo
93+
end
94+
95+
@testset "annotate_parent" begin
96+
chk_parent(st, parent) = get(st, :parent, nothing) === parent &&
97+
all(c->chk_parent(c, st._id), children(st))
98+
# 1 -+-> 2 ->-> 4 --> 5
99+
# | |
100+
# +-> 3 -+
101+
g = SyntaxGraph([1:2, 3:3, 4:4, 5:5, 0:-1], [2, 3, 4, 4, 5],
102+
Dict(:kind => nblocks(1:5), :source => nlines(1:5)))
103+
st = JuliaLowering.annotate_parent!(SyntaxTree(g, 1))
104+
@test chk_parent(st, nothing)
105+
# NamedTuple-based attrs
106+
g = SyntaxGraph([1:2, 3:3, 4:4, 5:5, 0:-1], [2, 3, 4, 4, 5],
107+
(;kind=nblocks(1:5), source=nlines(1:5)))
108+
st = JuliaLowering.annotate_parent!(SyntaxTree(g, 1))
109+
@test chk_parent(st, nothing)
110+
end
111+
112+
@testset "prune" begin
113+
test_mod = Module()
114+
st0 = parsestmt(SyntaxTree, "function foo end")
115+
st5 = JuliaLowering.lower(test_mod, st0)
116+
stp = JuliaLowering.prune(st5)
117+
# TODO
118+
end
19119
end

0 commit comments

Comments
 (0)