450450
451451attrsummary (name, value) = string (name)
452452attrsummary (name, value:: Number ) = " $name =$value "
453+ attrsummary (name, value:: String ) = " $name =\" $(Base. escape_string (value)) \" "
454+ attrsummary (name, value:: Symbol ) = " $name =$value "
455+ attrsummary (name, value:: LineNumberNode ) = " $name =L$(value. line) "
453456
454457function _value_string (ex)
455458 k = kind (ex)
@@ -490,6 +493,16 @@ function _value_string(ex)
490493 return str
491494end
492495
496+ function _short_locstr (ex)
497+ fp = flattened_provenance (ex)
498+ outs = map (fp) do p
499+ p. source isa LineNumberNode ? " L$(p. source. line) " :
500+ " $(byte_range (p). start) :$(byte_range (p). stop) "
501+ end
502+ out = join (outs, " ," )
503+ return " [$out ]"
504+ end
505+
493506function _show_syntax_tree (io, ex, indent, show_kinds)
494507 val = get (ex, :value , nothing )
495508 nodestr = ! is_leaf (ex) ? " [$(untokenize (head (ex))) ]" : _value_string (ex)
@@ -499,9 +512,10 @@ function _show_syntax_tree(io, ex, indent, show_kinds)
499512 treestr = treestr* " :: " * string (kind (ex))
500513 end
501514
502- std_attrs = Set ([:name_val , :value , : kind ,:syntax_flags , : source, :var_id ])
515+ std_attrs = Set ([:kind , : source ])
503516 attrstr = join ([attrsummary (n, getproperty (ex, n))
504517 for n in attrnames (ex) if n ∉ std_attrs], " ," )
518+ attrstr = " $(_short_locstr (ex)) ($(ex. _id) ) $attrstr "
505519 treestr = string (rpad (treestr, 60 ), " │ $attrstr " )
506520
507521 println (io, treestr)
@@ -798,3 +812,223 @@ end
798812# out
799813# end
800814
815+ # -------------------------------------------------------------------------------
816+ # Data structure utilities
817+
818+ """
819+ unalias_nodes(st::SyntaxTree)
820+
821+ Return a tree where each descendent of `st` has exactly one parent in `st`. The
822+ returned tree is identical to `st` in all but underlying representation, where
823+ every additional parent to a subtree generates a copy of that subtree. Apart
824+ from this, `unalias_nodes!` should not allocate new nodes unnecessarily.
825+
826+ unalias_nodes(sl::SyntaxList)
827+
828+ If a `SyntaxList` is given, every resulting tree will be unique with respect to
829+ each other as well as internally. A duplicate id will produce a copied tree.
830+ """
831+ unalias_nodes (st:: SyntaxTree ) = SyntaxTree (
832+ syntax_graph (st),
833+ _unalias_nodes! (syntax_graph (st), st. _id, Set {NodeId} (), Set {Int} ()))
834+
835+ function unalias_nodes (sl:: SyntaxList )
836+ seen = Set {NodeId} ()
837+ seen_edges = Set {Int} ()
838+ SyntaxList (syntax_graph (sl),
839+ map (id-> _unalias_nodes! (syntax_graph (sl), id, seen, seen_edges), sl. ids))
840+ end
841+
842+ # Note that `seen_edges` is only needed for when edge ranges overlap, which is a
843+ # situation we don't produce yet.
844+ function _unalias_nodes! (graph:: SyntaxGraph , id:: NodeId , seen:: Set{NodeId} , seen_edges:: Set{Int} )
845+ if id in seen
846+ # all nodes from `copy_ast` are fresh/unreferenced, and distinct from
847+ # each other, so no further recursion is needed.
848+ return copy_ast (graph, SyntaxTree (graph, id); copy_source= false ). _id
849+ end
850+ if ! isempty (intersect (seen_edges, graph. edge_ranges[id]))
851+ # someone is referencing our edges; run away so we can modify them
852+ next_edge = length (graph. edges) + 1
853+ append! (graph. edges, children (graph, id))
854+ graph. edge_ranges[id] = next_edge: lastindex (graph. edges)
855+ end
856+ union! (seen_edges, graph. edge_ranges[id])
857+ push! (seen, id)
858+
859+ for (c, i) in zip (children (graph, id), graph. edge_ranges[id])
860+ c2 = _unalias_nodes! (graph, c, seen, seen_edges)
861+ # the new child should be the same in every way to the old one, so
862+ # modify the edge instead of triggering copies with `mapchildren`
863+ c != = c2 && (graph. edges[i] = c2)
864+ end
865+ return id
866+ end
867+
868+ """
869+ Give each descendent of `st` a `parent::NodeId` attribute.
870+ """
871+ function annotate_parent! (st:: SyntaxTree )
872+ g = unfreeze_attrs (syntax_graph (st))
873+ st = unalias_nodes (SyntaxTree (g, st. _id))
874+ ensure_attributes! (g; parent= NodeId)
875+ mapchildren (t-> _annotate_parent! (t, st. _id), syntax_graph (st), st)
876+ end
877+
878+ function _annotate_parent! (st:: SyntaxTree , pid:: NodeId )
879+ setattr! (st; parent= pid)
880+ mapchildren (t-> _annotate_parent! (t, st. _id), syntax_graph (st), st)
881+ end
882+
883+ """
884+ Return a tree where unreachable nodes (non-descendents of `st`) in its graph
885+ have been deleted, and where provenance data has been minimized.
886+
887+ If `keep` is not nothing, also consider descendents of it reachable. By
888+ default, `keep` is the final node(s) in the provenance chain of `st`. This
889+ means that, by default, we have expression provenance back to the original
890+ parsed nodes, but no lowering-internal provenance. In any case, we still retain
891+ byte (or, with old macros, LineNumberNode) provenance.
892+
893+ Provenance shrinkage: Green trees are omitted from SourceRefs. If node A
894+ references node B as its source and B is unreachable, A adopts the source of B.
895+
896+ No attributes are deleted (but that can be done separately). Possible TODO:
897+ Non-sparse attributes could be stored more compactly
898+ """
899+ function prune (st:: SyntaxTree ; keep:: Union{SyntaxTree, SyntaxList, Nothing} = flattened_provenance (st))
900+ entrypoints = NodeId[st. _id]
901+ keep isa SyntaxList && append! (entrypoints, keep. ids)
902+ keep isa SyntaxTree && push! (entrypoints, keep. _id)
903+ prune_impl (syntax_graph (st), unique (entrypoints))[1 ]
904+ end
905+
906+ function prune_a (graph1:: SyntaxGraph , entrypoints:: Vector{NodeId} )
907+ @assert length (entrypoints) === length (unique (entrypoints))
908+ nodes1 = NodeId[entrypoints... ] # note nodes encountered >once appear once
909+ map12 = Dict {NodeId, Int} ()
910+ graph2 = ensure_attributes! (SyntaxGraph (); attrtypes (graph1)... )
911+ while length (graph2. edge_ranges) < length (nodes1)
912+ n2 = length (graph2. edge_ranges) + 1
913+ n1 = nodes1[n2]
914+ map12[n1] = n2
915+ push! (graph2. edge_ranges, is_leaf (graph1, n1) ?
916+ (0 : - 1 ) : (1 : numchildren (graph1, n1)) .+ length (graph2. edges))
917+ for c1 in children (graph1, n1)
918+ if haskey (map12, c1)
919+ push! (graph2. edges, map12[c1])
920+ else
921+ push! (nodes1, c1)
922+ push! (graph2. edges, length (nodes1))
923+ end
924+ end
925+ end
926+
927+ for attr in attrnames (graph1)
928+ attr === :source && continue
929+ for (n2, n1) in enumerate (nodes1)
930+ if (begin
931+ attrval = get (graph1. attributes[attr], n1, nothing )
932+ ! isnothing (attrval)
933+ end )
934+ graph2. attributes[attr][n2] = attrval
935+ end
936+ end
937+ end
938+
939+ resolved_sources = Dict {NodeId, SourceAttrType} () # graph1 => graph2
940+ function get_resolved! (id1:: NodeId )
941+ res = get (resolved_sources, id1, nothing )
942+ if isnothing (res)
943+ src1 = graph1. source[id1]
944+ res = if haskey (map12, src1)
945+ map12[src1]
946+ elseif src1 isa NodeId
947+ get_resolved! (src1)
948+ elseif src1 isa Tuple
949+ map (get_resolved!, src1)
950+ elseif src1 isa SourceRef
951+ SourceRef (src1. file, src1. first_byte, src1. last_byte, nothing )
952+ else
953+ src1
954+ end
955+ resolved_sources[id1] = res
956+ end
957+ return res
958+ end
959+
960+ for (n2, n1) in enumerate (nodes1)
961+ graph2. source[n2] = get_resolved! (n1)
962+ end
963+ return SyntaxList (graph2, 1 : length (entrypoints))
964+ end
965+
966+ # Experiment: What can we gain if we are allowed to assume nodes are unaliased?
967+
968+ # This undoes a small amount of space savings from the DAG representation, but
969+ # it allows us to (1) omit the whole `edges` array (TODO ), and (2) make the
970+ # pruning algorithm simpler. The invariant we win is having `edge_ranges` be
971+ # one or more interleaved level-order traversals where every node's set of
972+ # children is contiguous, so its entries can refer to itself instead of `edges`.
973+ function prune_u (graph1_a:: SyntaxGraph , entrypoints_a:: Vector{NodeId} )
974+ @assert length (entrypoints_a) === length (unique (entrypoints_a))
975+ unaliased = unalias_nodes (SyntaxList (graph1_a, entrypoints_a))
976+ (graph1, entrypoints) = (unaliased. graph, unaliased. ids)
977+ nodes1 = NodeId[entrypoints... ] # Reachable subset of graph1
978+ map12 = Dict {NodeId, Int} () # graph1 => graph2 mapping
979+ graph2 = ensure_attributes! (SyntaxGraph (); attrtypes (graph1)... )
980+ while length (graph2. edge_ranges) < length (nodes1)
981+ n2 = length (graph2. edge_ranges) + 1
982+ n1 = nodes1[n2]
983+ map12[n1] = n2
984+ push! (graph2. edge_ranges, is_leaf (graph1, n1) ?
985+ (0 : - 1 ) : (1 : numchildren (graph1, n1)) .+ length (nodes1))
986+ for c1 in children (graph1, n1)
987+ push! (nodes1, c1)
988+ end
989+ end
990+ graph2. edges = 1 : length (nodes1) # our reward for unaliasing
991+
992+ for attr in attrnames (graph1)
993+ attr === :source && continue
994+ for (n2, n1) in enumerate (nodes1)
995+ if (begin
996+ attrval = get (graph1. attributes[attr], n1, nothing )
997+ ! isnothing (attrval)
998+ end )
999+ graph2. attributes[attr][n2] = attrval
1000+ end
1001+ end
1002+ end
1003+
1004+ # Prune provenance. Tricky due to dangling `.source` references.
1005+ resolved_sources = Dict {NodeId, SourceAttrType} () # graph1 => graph2
1006+ function get_resolved! (id1:: NodeId )
1007+ res = get (resolved_sources, id1, nothing )
1008+ if isnothing (res)
1009+ src1 = graph1. source[id1]
1010+ res = if haskey (map12, src1)
1011+ map12[src1]
1012+ elseif src1 isa NodeId
1013+ get_resolved! (src1)
1014+ elseif src1 isa Tuple
1015+ map (get_resolved!, src1)
1016+ elseif src1 isa SourceRef
1017+ SourceRef (src1. file, src1. first_byte, src1. last_byte, nothing )
1018+ else
1019+ src1
1020+ end
1021+ resolved_sources[id1] = res
1022+ end
1023+ return res
1024+ end
1025+
1026+ for (n2, n1) in enumerate (nodes1)
1027+ graph2. source[n2] = get_resolved! (n1)
1028+ end
1029+
1030+ # The first n entries in nodes1 were our entrypoints, unique from unaliasing
1031+ return SyntaxList (graph2, 1 : length (entrypoints))
1032+ end
1033+
1034+ const prune_impl = prune_u
0 commit comments