Skip to content

Commit ab6c8b9

Browse files
committed
Graph utils: unalias_nodes, annotate_parent, prune
1 parent 79159b6 commit ab6c8b9

File tree

2 files changed

+315
-0
lines changed

2 files changed

+315
-0
lines changed

src/syntax_graph.jl

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,3 +761,218 @@ end
761761
# out
762762
# end
763763

764+
#-------------------------------------------------------------------------------
765+
# Data structure utilities
766+
767+
"""
768+
unalias_nodes(st::SyntaxTree)
769+
770+
Return a tree where each descendent of `st` has exactly one parent in `st`. The
771+
returned tree is identical to `st` in all but underlying representation, where
772+
every additional parent to a subtree generates a copy of that subtree. Apart
773+
from this, `unalias_nodes!` should not allocate new nodes unnecessarily.
774+
775+
unalias_nodes(sl::SyntaxList)
776+
777+
If a `SyntaxList` is given, every resulting tree will be unique with respect to
778+
each other as well as internally. A duplicate id will produce a copied tree.
779+
"""
780+
unalias_nodes(st::SyntaxTree) = SyntaxTree(
781+
syntax_graph(st),
782+
_unalias_nodes!(syntax_graph(st), st._id, Set{NodeId}(), Set{Int}()))
783+
784+
function unalias_nodes(sl::SyntaxList)
785+
seen = Set{NodeId}()
786+
seen_edges = Set{Int}()
787+
SyntaxList(syntax_graph(sl),
788+
map(id->_unalias_nodes!(syntax_graph(sl), id, seen, seen_edges), sl.ids))
789+
end
790+
791+
# Note that `seen_edges` is only needed for when edge ranges overlap, which is a
792+
# situation we don't produce yet.
793+
function _unalias_nodes!(graph::SyntaxGraph, id::NodeId, seen::Set{NodeId}, seen_edges::Set{Int})
794+
if id in seen
795+
# all nodes from `copy_ast` are fresh/unreferenced, and distinct from
796+
# each other, so no further recursion is needed.
797+
return copy_ast(graph, SyntaxTree(graph, id); copy_source=false)._id
798+
end
799+
if !isempty(intersect(seen_edges, graph.edge_ranges[id]))
800+
# someone is referencing our edges; run away so we can modify them
801+
next_edge = length(graph.edges) + 1
802+
append!(graph.edges, children(graph, id))
803+
graph.edge_ranges[id] = next_edge:lastindex(graph.edges)
804+
end
805+
union!(seen_edges, graph.edge_ranges[id])
806+
push!(seen, id)
807+
808+
for (c, i) in zip(children(graph, id), graph.edge_ranges[id])
809+
c2 = _unalias_nodes!(graph, c, seen, seen_edges)
810+
# the new child should be the same in every way to the old one, so
811+
# modify the edge instead of triggering copies with `mapchildren`
812+
c !== c2 && (graph.edges[i] = c2)
813+
end
814+
return id
815+
end
816+
817+
"""
818+
Give each descendent of `st` a `parent::NodeId` attribute.
819+
"""
820+
function annotate_parent!(st::SyntaxTree)
821+
g = unfreeze_attrs(syntax_graph(st))
822+
st = unalias_nodes(SyntaxTree(g, st._id))
823+
ensure_attributes!(g; parent=NodeId)
824+
mapchildren(t->_annotate_parent!(t, st._id), syntax_graph(st), st)
825+
end
826+
827+
function _annotate_parent!(st::SyntaxTree, pid::NodeId)
828+
setattr!(st; parent=pid)
829+
mapchildren(t->_annotate_parent!(t, st._id), syntax_graph(st), st)
830+
end
831+
832+
"""
833+
Return a tree where unreachable nodes (non-descendents of `st`) in its graph
834+
have been deleted, and where provenance data has been minimized.
835+
836+
If `keep` is not nothing, also consider descendents of it reachable. By
837+
default, `keep` is the final node(s) in the provenance chain of `st`. This
838+
means that, by default, we have expression provenance back to the original
839+
parsed nodes, but no lowering-internal provenance. In any case, we still retain
840+
byte (or, with old macros, LineNumberNode) provenance.
841+
842+
Provenance shrinkage: Green trees are omitted from SourceRefs. If node A
843+
references node B as its source and B is unreachable, A adopts the source of B.
844+
845+
No attributes are deleted (but that can be done separately). Possible TODO:
846+
Non-sparse attributes could be stored more compactly
847+
"""
848+
function prune(st::SyntaxTree; keep::Union{SyntaxTree, SyntaxList, Nothing}=flattened_provenance(st))
849+
entrypoints = NodeId[st._id]
850+
keep isa SyntaxList && append!(entrypoints, keep.ids)
851+
keep isa SyntaxTree && push!(entrypoints, keep._id)
852+
prune_impl(syntax_graph(st), entrypoints)[1]
853+
end
854+
855+
function prune_a(graph1::SyntaxGraph, entrypoints::Vector{NodeId})
856+
entrypoints = unique(entrypoints)
857+
nodes1 = NodeId[entrypoints...] # note nodes encountered >once appear once
858+
map12 = Dict{NodeId, Int}()
859+
graph2 = ensure_attributes!(SyntaxGraph(); attrtypes(graph1)...)
860+
while length(graph2.edge_ranges) < length(nodes1)
861+
n2 = length(graph2.edge_ranges) + 1
862+
n1 = nodes1[n2]
863+
map12[n1] = n2
864+
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
865+
(0:-1) : (1:numchildren(graph1, n1) .+ length(graph2.edges)))
866+
for c1 in children(graph1, n1)
867+
!haskey(c1, map12) && push!(nodes1, c1)
868+
push!(graph2.edges, map12[c1])
869+
end
870+
end
871+
872+
for attr in attrnames(graph1)
873+
attr === :source && continue
874+
for (n2, n1) in enumerate(nodes1)
875+
if (begin
876+
attrval = get(graph1.attributes[attr], n1, nothing)
877+
!isnothing(attrval)
878+
end)
879+
graph2.attributes[attr][n2] = attrval
880+
end
881+
end
882+
end
883+
884+
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
885+
function get_resolved!(id1::NodeId)
886+
res = get(resolved_sources, id1, nothing)
887+
if isnothing(res)
888+
src1 = graph1.source[id1]
889+
res = if haskey(map12, src1)
890+
map12[src1]
891+
elseif src1 isa NodeId
892+
get_resolved!(src1)
893+
elseif src1 isa Tuple
894+
map(get_resolved!, src1)
895+
elseif src1 isa SourceRef
896+
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
897+
else
898+
src1
899+
end
900+
resolved_sources[id1] = res
901+
end
902+
return res
903+
end
904+
905+
for (n2, n1) in enumerate(nodes1)
906+
graph2.source[n2] = get_resolved!(n1)
907+
end
908+
return SyntaxList(graph2, map(id->map12[id], entrypoints))
909+
end
910+
911+
# Experiment: What can we gain if we are allowed to assume nodes are unaliased?
912+
913+
# This undoes a small amount of space savings from the DAG representation, but
914+
# it allows us to (1) omit the whole `edges` array (TODO), and (2) make the
915+
# pruning algorithm simpler. The invariant we win is having `edge_ranges` be
916+
# one or more interleaved level-order traversals where every node's set of
917+
# children is contiguous, so its entries can refer to itself instead of `edges`.
918+
function prune_u(graph1_a::SyntaxGraph, entrypoints_a::Vector{NodeId})
919+
unaliased = unalias_nodes(SyntaxList(graph1_a, entrypoints_a))
920+
(graph1, entrypoints) = (unaliased.graph, unaliased.ids)
921+
nodes1 = NodeId[entrypoints...] # Reachable subset of graph1
922+
map12 = Dict{NodeId, Int}() # graph1 => graph2 mapping
923+
graph2 = ensure_attributes!(SyntaxGraph(); attrtypes(graph1)...)
924+
while length(graph2.edge_ranges) < length(nodes1)
925+
n2 = length(graph2.edge_ranges) + 1
926+
n1 = nodes1[n2]
927+
map12[n1] = n2
928+
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
929+
(0:-1) : (1:numchildren(graph1, n1)) .+ length(nodes1))
930+
for c1 in children(graph1, n1)
931+
push!(nodes1, c1)
932+
end
933+
end
934+
graph2.edges = 1:length(nodes1) # our reward for unaliasing
935+
936+
for attr in attrnames(graph1)
937+
attr === :source && continue
938+
for (n2, n1) in enumerate(nodes1)
939+
if (begin
940+
attrval = get(graph1.attributes[attr], n1, nothing)
941+
!isnothing(attrval)
942+
end)
943+
graph2.attributes[attr][n2] = attrval
944+
end
945+
end
946+
end
947+
948+
# Prune provenance. Tricky due to dangling `.source` references.
949+
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
950+
function get_resolved!(id1::NodeId)
951+
res = get(resolved_sources, id1, nothing)
952+
if isnothing(res)
953+
src1 = graph1.source[id1]
954+
res = if haskey(map12, src1)
955+
map12[src1]
956+
elseif src1 isa NodeId
957+
get_resolved!(src1)
958+
elseif src1 isa Tuple
959+
map(get_resolved!, src1)
960+
elseif src1 isa SourceRef
961+
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
962+
else
963+
src1
964+
end
965+
resolved_sources[id1] = res
966+
end
967+
return res
968+
end
969+
970+
for (n2, n1) in enumerate(nodes1)
971+
graph2.source[n2] = get_resolved!(n1)
972+
end
973+
974+
# The first n entries in nodes1 were our entrypoints, unique from unaliasing
975+
return SyntaxList(graph2, 1:length(entrypoints))
976+
end
977+
978+
const prune_impl = prune_u

test/syntax_graph.jl

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,104 @@
1616
@test kind(tree2) == K"block"
1717
@test kind(tree2[1]) == K"Identifier" && tree2[1].name_val == "x"
1818
@test kind(tree2[2]) == K"Identifier" && tree2[2].name_val == "some_unique_identifier"
19+
20+
nblocks(range) = Dict(map(i->(i=>K"block"), range))
21+
nlines(range) = Dict{NodeId, JuliaLowering.SourceAttrType}(
22+
map(i->(i=>LineNumberNode(i, "file")), range))
23+
24+
@testset "unalias_nodes" begin
25+
# 1 -+-> 2 ->-> 4
26+
# | |
27+
# +-> 3 -+
28+
g = SyntaxGraph([1:2, 3:3, 4:4, 0:-1], [2, 3, 4, 4],
29+
Dict(:kind => nblocks(1:4), :source => nlines(1:4),
30+
:foo => Dict(4=>"foo")))
31+
st = SyntaxTree(g, 1)
32+
stu = JuliaLowering.unalias_nodes(st)
33+
# Only node 4 should be copied, and no new edges are needed.
34+
@test stu._graph.edge_ranges == [1:2, 3:3, 4:4, 0:-1, 0:-1]
35+
@test stu._graph.edges == [2, 3, 4, 5]
36+
# Properties of node 4 should be preserved
37+
@test st[1][1].foo == stu[1][1].foo == stu[2][1].foo
38+
@test st[1][1].source == stu[1][1].source == stu[2][1].source
39+
# Try again with overlapping edge_ranges
40+
g = SyntaxGraph([1:2, 3:3, 3:3, 0:-1], [2, 3, 4],
41+
Dict(:kind => nblocks(1:4), :source => nlines(1:4),
42+
:foo => Dict(4=>"foo")))
43+
st = SyntaxTree(g, 1)
44+
stu = JuliaLowering.unalias_nodes(st)
45+
@test stu._graph.edge_ranges == [1:2, 3:3, 4:4, 0:-1, 0:-1]
46+
@test stu._graph.edges == [2, 3, 4, 5]
47+
@test st[1][1].foo == stu[1][1].foo == stu[2][1].foo
48+
@test st[1][1].source == stu[1][1].source == stu[2][1].source
49+
50+
# +-> 5
51+
# |
52+
# 1 -+-> 2 -+---->>>-> 6
53+
# | |||
54+
# +-> 3 -> 7 -+||
55+
# | ||
56+
# +-> 4 -+-----+|
57+
# | |
58+
# +------+
59+
g = SyntaxGraph([1:3, 4:5, 6:6, 7:8, 0:-1, 0:-1, 9:9],
60+
[2, 3, 4, 5, 6, 7, 6, 6, 6],
61+
Dict(:kind => nblocks(1:7), :source => nlines(1:7),
62+
:foo => Dict(6=>"foo")))
63+
st = SyntaxTree(g, 1)
64+
stu = JuliaLowering.unalias_nodes(st)
65+
# node 6 should be copied three times
66+
@test length(stu._graph.edge_ranges) == 10
67+
@test length(stu._graph.edges) == 9
68+
@test st[1][2].foo == stu[1][2].foo == stu[2][1][1].foo == stu[3][1].foo == stu[3][2].foo
69+
@test st[1][2].source == stu[1][2].source == stu[2][1][1].source == stu[3][1].source == stu[3][2].source
70+
71+
# 1 -+-> 2 ->-> 4 -+----> 5 ->-> 7
72+
# | | | |
73+
# +-> 3 -+ +-->-> 6 -+
74+
# | |
75+
# +------------+
76+
g = SyntaxGraph([1:2, 3:3, 4:5, 6:7, 8:8, 9:9, 0:-1],
77+
[2,3,4,4,6,5,6,7,7],
78+
Dict(:kind => nblocks(1:7), :source => nlines(1:7),
79+
:foo => Dict(4=>4, 5=>5, 6=>6, 7=>7)))
80+
st = SyntaxTree(g, 1)
81+
stu = JuliaLowering.unalias_nodes(st)
82+
@test length(stu._graph.edge_ranges) == 15
83+
@test length(stu._graph.edges) == 14
84+
# node 4
85+
@test st[1][1].foo == stu[1][1].foo == stu[2][1].foo
86+
# node 5
87+
@test st[1][1][1].foo == stu[1][1][1].foo == stu[2][1][1].foo
88+
# node 6
89+
@test st[1][1][2].foo == stu[1][1][2].foo == stu[2][1][2].foo == stu[2][2].foo
90+
# node 7
91+
@test st[1][1][1][1].foo == stu[1][1][1][1].foo == stu[1][1][2][1].foo ==
92+
stu[2][1][1][1].foo == stu[2][1][2][1].foo == stu[2][2][1].foo
93+
end
94+
95+
@testset "annotate_parent" begin
96+
chk_parent(st, parent) = get(st, :parent, nothing) === parent &&
97+
all(c->chk_parent(c, st._id), children(st))
98+
# 1 -+-> 2 ->-> 4 --> 5
99+
# | |
100+
# +-> 3 -+
101+
g = SyntaxGraph([1:2, 3:3, 4:4, 5:5, 0:-1], [2, 3, 4, 4, 5],
102+
Dict(:kind => nblocks(1:5), :source => nlines(1:5)))
103+
st = JuliaLowering.annotate_parent!(SyntaxTree(g, 1))
104+
@test chk_parent(st, nothing)
105+
# NamedTuple-based attrs
106+
g = SyntaxGraph([1:2, 3:3, 4:4, 5:5, 0:-1], [2, 3, 4, 4, 5],
107+
(;kind=nblocks(1:5), source=nlines(1:5)))
108+
st = JuliaLowering.annotate_parent!(SyntaxTree(g, 1))
109+
@test chk_parent(st, nothing)
110+
end
111+
112+
@testset "prune" begin
113+
test_mod = Module()
114+
st0 = parsestmt(SyntaxTree, "function foo end")
115+
st5 = JuliaLowering.lower(test_mod, st0)
116+
stp = JuliaLowering.prune(st5)
117+
# TODO
118+
end
19119
end

0 commit comments

Comments
 (0)