Skip to content

Commit 7060b0c

Browse files
committed
Graph utils: unalias_nodes, annotate_parent, prune
1 parent 1bfae9d commit 7060b0c

File tree

2 files changed

+431
-1
lines changed

2 files changed

+431
-1
lines changed

src/syntax_graph.jl

Lines changed: 235 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,9 @@ end
450450

451451
attrsummary(name, value) = string(name)
452452
attrsummary(name, value::Number) = "$name=$value"
453+
attrsummary(name, value::String) = "$name=\"$(Base.escape_string(value))\""
454+
attrsummary(name, value::Symbol) = "$name=$value"
455+
attrsummary(name, value::LineNumberNode) = "$name=L$(value.line)"
453456

454457
function _value_string(ex)
455458
k = kind(ex)
@@ -490,6 +493,16 @@ function _value_string(ex)
490493
return str
491494
end
492495

496+
function _short_locstr(ex)
497+
fp = flattened_provenance(ex)
498+
outs = map(fp) do p
499+
p.source isa LineNumberNode ? "L$(p.source.line)" :
500+
"$(byte_range(p).start):$(byte_range(p).stop)"
501+
end
502+
out = join(outs, ",")
503+
return "[$out]"
504+
end
505+
493506
function _show_syntax_tree(io, ex, indent, show_kinds)
494507
val = get(ex, :value, nothing)
495508
nodestr = !is_leaf(ex) ? "[$(untokenize(head(ex)))]" : _value_string(ex)
@@ -499,9 +512,10 @@ function _show_syntax_tree(io, ex, indent, show_kinds)
499512
treestr = treestr*" :: "*string(kind(ex))
500513
end
501514

502-
std_attrs = Set([:name_val,:value,:kind,:syntax_flags,:source,:var_id])
515+
std_attrs = Set([:kind, :source])
503516
attrstr = join([attrsummary(n, getproperty(ex, n))
504517
for n in attrnames(ex) if n std_attrs], ",")
518+
attrstr = "$(_short_locstr(ex)) ($(ex._id)) $attrstr"
505519
treestr = string(rpad(treestr, 60), "$attrstr")
506520

507521
println(io, treestr)
@@ -798,3 +812,223 @@ end
798812
# out
799813
# end
800814

815+
#-------------------------------------------------------------------------------
816+
# Data structure utilities
817+
818+
"""
819+
unalias_nodes(st::SyntaxTree)
820+
821+
Return a tree where each descendent of `st` has exactly one parent in `st`. The
822+
returned tree is identical to `st` in all but underlying representation, where
823+
every additional parent to a subtree generates a copy of that subtree. Apart
824+
from this, `unalias_nodes!` should not allocate new nodes unnecessarily.
825+
826+
unalias_nodes(sl::SyntaxList)
827+
828+
If a `SyntaxList` is given, every resulting tree will be unique with respect to
829+
each other as well as internally. A duplicate id will produce a copied tree.
830+
"""
831+
unalias_nodes(st::SyntaxTree) = SyntaxTree(
832+
syntax_graph(st),
833+
_unalias_nodes!(syntax_graph(st), st._id, Set{NodeId}(), Set{Int}()))
834+
835+
function unalias_nodes(sl::SyntaxList)
836+
seen = Set{NodeId}()
837+
seen_edges = Set{Int}()
838+
SyntaxList(syntax_graph(sl),
839+
map(id->_unalias_nodes!(syntax_graph(sl), id, seen, seen_edges), sl.ids))
840+
end
841+
842+
# Note that `seen_edges` is only needed for when edge ranges overlap, which is a
843+
# situation we don't produce yet.
844+
function _unalias_nodes!(graph::SyntaxGraph, id::NodeId, seen::Set{NodeId}, seen_edges::Set{Int})
845+
if id in seen
846+
# all nodes from `copy_ast` are fresh/unreferenced, and distinct from
847+
# each other, so no further recursion is needed.
848+
return copy_ast(graph, SyntaxTree(graph, id); copy_source=false)._id
849+
end
850+
if !isempty(intersect(seen_edges, graph.edge_ranges[id]))
851+
# someone is referencing our edges; run away so we can modify them
852+
next_edge = length(graph.edges) + 1
853+
append!(graph.edges, children(graph, id))
854+
graph.edge_ranges[id] = next_edge:lastindex(graph.edges)
855+
end
856+
union!(seen_edges, graph.edge_ranges[id])
857+
push!(seen, id)
858+
859+
for (c, i) in zip(children(graph, id), graph.edge_ranges[id])
860+
c2 = _unalias_nodes!(graph, c, seen, seen_edges)
861+
# the new child should be the same in every way to the old one, so
862+
# modify the edge instead of triggering copies with `mapchildren`
863+
c !== c2 && (graph.edges[i] = c2)
864+
end
865+
return id
866+
end
867+
868+
"""
869+
Give each descendent of `st` a `parent::NodeId` attribute.
870+
"""
871+
function annotate_parent!(st::SyntaxTree)
872+
g = unfreeze_attrs(syntax_graph(st))
873+
st = unalias_nodes(SyntaxTree(g, st._id))
874+
ensure_attributes!(g; parent=NodeId)
875+
mapchildren(t->_annotate_parent!(t, st._id), syntax_graph(st), st)
876+
end
877+
878+
function _annotate_parent!(st::SyntaxTree, pid::NodeId)
879+
setattr!(st; parent=pid)
880+
mapchildren(t->_annotate_parent!(t, st._id), syntax_graph(st), st)
881+
end
882+
883+
"""
884+
Return a tree where unreachable nodes (non-descendents of `st`) in its graph
885+
have been deleted, and where provenance data has been minimized.
886+
887+
If `keep` is not nothing, also consider descendents of it reachable. By
888+
default, `keep` is the final node(s) in the provenance chain of `st`. This
889+
means that, by default, we have expression provenance back to the original
890+
parsed nodes, but no lowering-internal provenance. In any case, we still retain
891+
byte (or, with old macros, LineNumberNode) provenance.
892+
893+
Provenance shrinkage: Green trees are omitted from SourceRefs. If node A
894+
references node B as its source and B is unreachable, A adopts the source of B.
895+
896+
No attributes are deleted (but that can be done separately). Possible TODO:
897+
Non-sparse attributes could be stored more compactly
898+
"""
899+
function prune(st::SyntaxTree; keep::Union{SyntaxTree, SyntaxList, Nothing}=flattened_provenance(st))
900+
entrypoints = NodeId[st._id]
901+
keep isa SyntaxList && append!(entrypoints, keep.ids)
902+
keep isa SyntaxTree && push!(entrypoints, keep._id)
903+
prune_impl(syntax_graph(st), unique(entrypoints))[1]
904+
end
905+
906+
function prune_a(graph1::SyntaxGraph, entrypoints::Vector{NodeId})
907+
@assert length(entrypoints) === length(unique(entrypoints))
908+
nodes1 = NodeId[entrypoints...] # note nodes encountered >once appear once
909+
map12 = Dict{NodeId, Int}()
910+
graph2 = ensure_attributes!(SyntaxGraph(); attrtypes(graph1)...)
911+
while length(graph2.edge_ranges) < length(nodes1)
912+
n2 = length(graph2.edge_ranges) + 1
913+
n1 = nodes1[n2]
914+
map12[n1] = n2
915+
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
916+
(0:-1) : (1:numchildren(graph1, n1)) .+ length(graph2.edges))
917+
for c1 in children(graph1, n1)
918+
if haskey(map12, c1)
919+
push!(graph2.edges, map12[c1])
920+
else
921+
push!(nodes1, c1)
922+
push!(graph2.edges, length(nodes1))
923+
end
924+
end
925+
end
926+
927+
for attr in attrnames(graph1)
928+
attr === :source && continue
929+
for (n2, n1) in enumerate(nodes1)
930+
if (begin
931+
attrval = get(graph1.attributes[attr], n1, nothing)
932+
!isnothing(attrval)
933+
end)
934+
graph2.attributes[attr][n2] = attrval
935+
end
936+
end
937+
end
938+
939+
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
940+
function get_resolved!(id1::NodeId)
941+
res = get(resolved_sources, id1, nothing)
942+
if isnothing(res)
943+
src1 = graph1.source[id1]
944+
res = if haskey(map12, src1)
945+
map12[src1]
946+
elseif src1 isa NodeId
947+
get_resolved!(src1)
948+
elseif src1 isa Tuple
949+
map(get_resolved!, src1)
950+
elseif src1 isa SourceRef
951+
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
952+
else
953+
src1
954+
end
955+
resolved_sources[id1] = res
956+
end
957+
return res
958+
end
959+
960+
for (n2, n1) in enumerate(nodes1)
961+
graph2.source[n2] = get_resolved!(n1)
962+
end
963+
return SyntaxList(graph2, 1:length(entrypoints))
964+
end
965+
966+
# Experiment: What can we gain if we are allowed to assume nodes are unaliased?
967+
968+
# This undoes a small amount of space savings from the DAG representation, but
969+
# it allows us to (1) omit the whole `edges` array (TODO), and (2) make the
970+
# pruning algorithm simpler. The invariant we win is having `edge_ranges` be
971+
# one or more interleaved level-order traversals where every node's set of
972+
# children is contiguous, so its entries can refer to itself instead of `edges`.
973+
function prune_u(graph1_a::SyntaxGraph, entrypoints_a::Vector{NodeId})
974+
@assert length(entrypoints_a) === length(unique(entrypoints_a))
975+
unaliased = unalias_nodes(SyntaxList(graph1_a, entrypoints_a))
976+
(graph1, entrypoints) = (unaliased.graph, unaliased.ids)
977+
nodes1 = NodeId[entrypoints...] # Reachable subset of graph1
978+
map12 = Dict{NodeId, Int}() # graph1 => graph2 mapping
979+
graph2 = ensure_attributes!(SyntaxGraph(); attrtypes(graph1)...)
980+
while length(graph2.edge_ranges) < length(nodes1)
981+
n2 = length(graph2.edge_ranges) + 1
982+
n1 = nodes1[n2]
983+
map12[n1] = n2
984+
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
985+
(0:-1) : (1:numchildren(graph1, n1)) .+ length(nodes1))
986+
for c1 in children(graph1, n1)
987+
push!(nodes1, c1)
988+
end
989+
end
990+
graph2.edges = 1:length(nodes1) # our reward for unaliasing
991+
992+
for attr in attrnames(graph1)
993+
attr === :source && continue
994+
for (n2, n1) in enumerate(nodes1)
995+
if (begin
996+
attrval = get(graph1.attributes[attr], n1, nothing)
997+
!isnothing(attrval)
998+
end)
999+
graph2.attributes[attr][n2] = attrval
1000+
end
1001+
end
1002+
end
1003+
1004+
# Prune provenance. Tricky due to dangling `.source` references.
1005+
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
1006+
function get_resolved!(id1::NodeId)
1007+
res = get(resolved_sources, id1, nothing)
1008+
if isnothing(res)
1009+
src1 = graph1.source[id1]
1010+
res = if haskey(map12, src1)
1011+
map12[src1]
1012+
elseif src1 isa NodeId
1013+
get_resolved!(src1)
1014+
elseif src1 isa Tuple
1015+
map(get_resolved!, src1)
1016+
elseif src1 isa SourceRef
1017+
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
1018+
else
1019+
src1
1020+
end
1021+
resolved_sources[id1] = res
1022+
end
1023+
return res
1024+
end
1025+
1026+
for (n2, n1) in enumerate(nodes1)
1027+
graph2.source[n2] = get_resolved!(n1)
1028+
end
1029+
1030+
# The first n entries in nodes1 were our entrypoints, unique from unaliasing
1031+
return SyntaxList(graph2, 1:length(entrypoints))
1032+
end
1033+
1034+
const prune_impl = prune_u

0 commit comments

Comments
 (0)