Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 146 additions & 3 deletions src/syntax_graph.jl
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,17 @@ end
struct SourceRef
file::SourceFile
first_byte::Int
# TODO: Do we need the green node, or would last_byte suffice?
green_tree::JuliaSyntax.GreenNode
last_byte::Int
# TODO: `nothing` is only used when we `prune` a tree, but we may not need
# this field at all.
green_tree::Union{Nothing, JuliaSyntax.GreenNode}
end

SourceRef(file::SourceFile, first_byte::Int, green_tree::Union{Nothing, JuliaSyntax.GreenNode}) =
SourceRef(file, first_byte, first_byte + span(green_tree) - 1, green_tree)

JuliaSyntax.sourcefile(src::SourceRef) = src.file
JuliaSyntax.byte_range(src::SourceRef) = src.first_byte:(src.first_byte + span(src.green_tree) - 1)
JuliaSyntax.byte_range(src::SourceRef) = src.first_byte:src.last_byte

# TODO: Adding these methods to support LineNumberNode is kind of hacky but we
# can remove these after JuliaLowering becomes self-bootstrapping for macros
Expand Down Expand Up @@ -817,3 +822,141 @@ end
# end
# out
# end

#-------------------------------------------------------------------------------
# Data structure utilities

"""
unalias_nodes(st::SyntaxTree)

Return a tree where each descendent of `st` has exactly one parent in `st`. The
returned tree is identical to `st` in all but underlying representation, where
every additional parent to a subtree generates a copy of that subtree. Apart
from this, `unalias_nodes` should not allocate new nodes unnecessarily.

unalias_nodes(sl::SyntaxList)

If a `SyntaxList` is given, every resulting tree will be unique with respect to
each other as well as internally. A duplicate entry will produce a copied tree.
"""
unalias_nodes(st::SyntaxTree) = SyntaxTree(
syntax_graph(st),
_unalias_nodes(syntax_graph(st), st._id, Set{NodeId}(), Set{Int}()))

function unalias_nodes(sl::SyntaxList)
seen = Set{NodeId}()
seen_edges = Set{Int}()
SyntaxList(syntax_graph(sl),
map(id->_unalias_nodes(syntax_graph(sl), id, seen, seen_edges), sl.ids))
end

# Note that `seen_edges` is only needed for when edge ranges overlap, which is a
# situation we don't produce yet.
function _unalias_nodes(graph::SyntaxGraph, id::NodeId, seen::Set{NodeId}, seen_edges::Set{Int})
if id in seen
id = copy_ast(graph, SyntaxTree(graph, id); copy_source=false)._id
end
if !isempty(intersect(seen_edges, graph.edge_ranges[id]))
# someone is referencing our edges; run away so we can modify them
next_edge = length(graph.edges) + 1
append!(graph.edges, children(graph, id))
graph.edge_ranges[id] = next_edge:lastindex(graph.edges)
end
union!(seen_edges, graph.edge_ranges[id])
push!(seen, id)

for (c, i) in zip(children(graph, id), graph.edge_ranges[id])
c2 = _unalias_nodes(graph, c, seen, seen_edges)
# the new child should be the same in every way to the old one, so
# modify the edge instead of triggering copies with `mapchildren`
c !== c2 && (graph.edges[i] = c2)
end
return id
end

"""
Return a tree where unreachable nodes (non-descendents of `st`) in its graph
have been deleted, and where provenance data has been minimized.

If `keep` is not nothing, also consider descendents of it reachable. By
default, `keep` is the final node(s) in the provenance chain of `st`. This
means that, by default, we have expression provenance back to the original
parsed nodes, but no lowering-internal provenance. In any case, we still retain
byte (or, with old macros, LineNumberNode) provenance.

Provenance shrinkage: Green trees are omitted from SourceRefs. If node A
references node B as its source and B is unreachable, A adopts the source of B.
"""
function prune(st::SyntaxTree; keep::Union{SyntaxTree, SyntaxList, Nothing}=flattened_provenance(st))
entrypoints = NodeId[st._id]
keep isa SyntaxList && append!(entrypoints, keep.ids)
keep isa SyntaxTree && push!(entrypoints, keep._id)
prune(syntax_graph(st), unique(entrypoints))[1]
end

# This implementation unaliases nodes, which undoes a small amount of space
# savings from the DAG representation, but it allows us to (1) omit the whole
# `edges` array (TODO), and (2) make the pruning algorithm simpler. The
# invariant we win is having `edge_ranges` be one or more interleaved
# level-order traversals where every node's set of children is contiguous, so
# its entries can refer to itself instead of an external `edges` vector.
function prune(graph1_a::SyntaxGraph, entrypoints_a::Vector{NodeId})
@assert length(entrypoints_a) === length(unique(entrypoints_a))
unaliased = unalias_nodes(SyntaxList(graph1_a, entrypoints_a))
(graph1, entrypoints) = (unaliased.graph, unaliased.ids)
nodes1 = NodeId[entrypoints...] # Reachable subset of graph1
map12 = Dict{NodeId, Int}() # graph1 => graph2 mapping
graph2 = ensure_attributes!(SyntaxGraph(); attrdefs(graph1)...)
while length(graph2.edge_ranges) < length(nodes1)
n2 = length(graph2.edge_ranges) + 1
n1 = nodes1[n2]
map12[n1] = n2
push!(graph2.edge_ranges, is_leaf(graph1, n1) ?
(0:-1) : (1:numchildren(graph1, n1)) .+ length(nodes1))
for c1 in children(graph1, n1)
push!(nodes1, c1)
end
end
graph2.edges = 1:length(nodes1) # our reward for unaliasing

for attr in attrnames(graph1)
attr === :source && continue
for (n2, n1) in enumerate(nodes1)
if (begin
attrval = get(graph1.attributes[attr], n1, nothing)
!isnothing(attrval)
end)
graph2.attributes[attr][n2] = attrval
end
end
end

# Prune provenance. Tricky due to dangling `.source` references.
resolved_sources = Dict{NodeId, SourceAttrType}() # graph1 => graph2
function get_resolved!(id1::NodeId)
res = get(resolved_sources, id1, nothing)
if isnothing(res)
src1 = graph1.source[id1]
res = if haskey(map12, src1)
map12[src1]
elseif src1 isa NodeId
get_resolved!(src1)
elseif src1 isa Tuple
map(get_resolved!, src1)
elseif src1 isa SourceRef
SourceRef(src1.file, src1.first_byte, src1.last_byte, nothing)
else
src1
end
resolved_sources[id1] = res
end
return res
end

for (n2, n1) in enumerate(nodes1)
graph2.source[n2] = get_resolved!(n1)
end

# The first n entries in nodes1 were our entrypoints, unique from unaliasing
return SyntaxList(graph2, 1:length(entrypoints))
end
4 changes: 2 additions & 2 deletions test/functions_ir.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1532,7 +1532,7 @@ end
18 (call core.svec %₁₅ %₁₆ %₁₇)
19 --- method core.nothing %₁₈
slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)]
1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68]), 1, (macrocall (macro_name 1-1::@-t 2-10::Identifier) 11-11::Whitespace-t (function 12-19::function-t 20-20::Whitespace-t (call 21-36::Identifier 37-37::(-t 38-38::Identifier 39-39::,-t 40-40::Whitespace-t 41-41::Identifier 42-42::)-t) (block 43-47::NewlineWs-t (call 48-61::Identifier 62-62::(-t 63-63::Identifier 64-64::,-t 65-65::Identifier 66-66::)-t) 67-67::NewlineWs-t) 68-70::end-t))) (call core.svec :#self# :x :y) (call core.svec)))
1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68]), 1, 70, (macrocall (macro_name 1-1::@-t 2-10::Identifier) 11-11::Whitespace-t (function 12-19::function-t 20-20::Whitespace-t (call 21-36::Identifier 37-37::(-t 38-38::Identifier 39-39::,-t 40-40::Whitespace-t 41-41::Identifier 42-42::)-t) (block 43-47::NewlineWs-t (call 48-61::Identifier 62-62::(-t 63-63::Identifier 64-64::,-t 65-65::Identifier 66-66::)-t) 67-67::NewlineWs-t) 68-70::end-t))) (call core.svec :#self# :x :y) (call core.svec)))
2 (meta :generated_only)
3 (return core.nothing)
20 latestworld
Expand Down Expand Up @@ -1578,7 +1578,7 @@ end
18 (call core.svec %₁₅ %₁₆ %₁₇)
19 --- method core.nothing %₁₈
slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/maybe_gen_stuff slot₅/nongen_stuff]
1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269]), 1, (function 1-8::function-t 9-9::Whitespace-t (call 10-30::Identifier 31-31::(-t 32-32::Identifier 33-33::,-t 34-34::Whitespace-t 35-35::Identifier 36-36::)-t) (block 37-41::NewlineWs-t (= 42-53::Identifier 54-54::Whitespace-t 55-55::=-t 56-56::Whitespace-t (call 57-63::Identifier 64-64::(-t 65-65::Identifier 66-66::,-t 67-67::Whitespace-t 68-68::Identifier 69-69::)-t)) 70-74::NewlineWs-t (if 75-76::if-t 77-77::Whitespace-t (macrocall (macro_name 78-78::@-t 79-87::Identifier)) (block 88-96::NewlineWs-t (quote (block 97-101::quote-t 102-114::NewlineWs-t (= 115-129::Identifier 130-130::Whitespace-t 131-131::=-t 132-132::Whitespace-t (call 133-146::Identifier 147-147::(-t 148-148::Identifier 149-149::,-t 150-150::Whitespace-t 151-151::Identifier 152-152::)-t)) 153-161::NewlineWs-t 162-164::end-t)) 165-169::NewlineWs-t) 170-173::else-t (block 174-182::NewlineWs-t (= 183-197::Identifier 198-198::Whitespace-t 199-199::=-t 200-200::Whitespace-t (call 201-217::Identifier 218-218::(-t 219-219::Identifier 220-220::,-t 221-221::Whitespace-t 222-222::Identifier 223-223::)-t)) 224-228::NewlineWs-t) 229-231::end-t) 232-236::NewlineWs-t (tuple-p 237-237::(-t 238-249::Identifier 250-250::,-t 251-251::Whitespace-t 252-266::Identifier 267-267::)-t) 268-268::NewlineWs-t) 269-271::end-t)) (call core.svec :#self# :x :y) (call core.svec)))
1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269]), 1, 271, (function 1-8::function-t 9-9::Whitespace-t (call 10-30::Identifier 31-31::(-t 32-32::Identifier 33-33::,-t 34-34::Whitespace-t 35-35::Identifier 36-36::)-t) (block 37-41::NewlineWs-t (= 42-53::Identifier 54-54::Whitespace-t 55-55::=-t 56-56::Whitespace-t (call 57-63::Identifier 64-64::(-t 65-65::Identifier 66-66::,-t 67-67::Whitespace-t 68-68::Identifier 69-69::)-t)) 70-74::NewlineWs-t (if 75-76::if-t 77-77::Whitespace-t (macrocall (macro_name 78-78::@-t 79-87::Identifier)) (block 88-96::NewlineWs-t (quote (block 97-101::quote-t 102-114::NewlineWs-t (= 115-129::Identifier 130-130::Whitespace-t 131-131::=-t 132-132::Whitespace-t (call 133-146::Identifier 147-147::(-t 148-148::Identifier 149-149::,-t 150-150::Whitespace-t 151-151::Identifier 152-152::)-t)) 153-161::NewlineWs-t 162-164::end-t)) 165-169::NewlineWs-t) 170-173::else-t (block 174-182::NewlineWs-t (= 183-197::Identifier 198-198::Whitespace-t 199-199::=-t 200-200::Whitespace-t (call 201-217::Identifier 218-218::(-t 219-219::Identifier 220-220::,-t 221-221::Whitespace-t 222-222::Identifier 223-223::)-t)) 224-228::NewlineWs-t) 229-231::end-t) 232-236::NewlineWs-t (tuple-p 237-237::(-t 238-249::Identifier 250-250::,-t 251-251::Whitespace-t 252-266::Identifier 267-267::)-t) 268-268::NewlineWs-t) 269-271::end-t)) (call core.svec :#self# :x :y) (call core.svec)))
2 TestMod.bothgen
3 (= slot₅/nongen_stuff (call %₂ slot₂/x slot₃/y))
4 TestMod.some_nongen_stuff
Expand Down
175 changes: 174 additions & 1 deletion test/syntax_graph.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ end
function testgraph(edge_ranges, edges, more_attrs...)
kinds = Dict(map(i->(i=>K"block"), eachindex(edge_ranges)))
sources = Dict(map(i->(i=>LineNumberNode(i)), eachindex(edge_ranges)))
orig = Dict(map(i->(i=>i), eachindex(edge_ranges)))
SyntaxGraph(
edge_ranges,
edges,
Dict(:kind => kinds, :source => sources, more_attrs...))
Dict(:kind => kinds, :source => sources,
:orig => orig, more_attrs...))
end

@testset "copy_ast" begin
Expand Down Expand Up @@ -107,4 +109,175 @@ end
# Disallow for now, since we can't prevent dangling sourcerefs
@test_throws ErrorException JuliaLowering.copy_ast(new_g, st; copy_source=false)
end

@testset "unalias_nodes" begin
# 1 -+-> 2 ->-> 4
# | |
# +-> 3 -+
g = testgraph([1:2, 3:3, 4:4, 0:-1], [2, 3, 4, 4])
st = SyntaxTree(g, 1)
stu = JuliaLowering.unalias_nodes(st)
@test st ≈ stu
@test length(stu._graph.edge_ranges) == 5
@test length(stu._graph.edges) == 4
# Properties of node 4 should be preserved
@test 4 == stu[1][1].orig == stu[2][1].orig
@test st[1][1].source == stu[1][1].source == stu[2][1].source
@test stu[1][1]._id != stu[2][1]._id

# Try again with overlapping edge_ranges
g = testgraph([1:2, 3:3, 3:3, 0:-1], [2, 3, 4])
st = SyntaxTree(g, 1)
stu = JuliaLowering.unalias_nodes(st)
@test st ≈ stu
@test length(stu._graph.edge_ranges) == 5
@test length(stu._graph.edges) == 4
@test 4 == stu[1][1].orig == stu[2][1].orig
@test st[1][1].source == stu[1][1].source == stu[2][1].source
@test stu[1][1]._id != stu[2][1]._id

# +-> 5
# |
# 1 -+-> 2 -+---->>>-> 6
# | |||
# +-> 3 -> 7 -+||
# | ||
# +-> 4 -+-----+|
# | |
# +------+
g = testgraph([1:3, 4:5, 6:6, 7:8, 0:-1, 0:-1, 9:9],
[2, 3, 4, 5, 6, 7, 6, 6, 6])
st = SyntaxTree(g, 1)
stu = JuliaLowering.unalias_nodes(st)
@test st ≈ stu
# node 6 should be copied three times
@test length(stu._graph.edge_ranges) == 10
@test length(stu._graph.edges) == 9
# the four copies of node 6 should have attrs identical to the original and distinct ids
@test 6 == stu[1][2].orig == stu[2][1][1].orig == stu[3][1].orig == stu[3][2].orig
@test stu[1][2]._id != stu[2][1][1]._id != stu[3][1]._id != stu[3][2]._id

# 1 -+-> 2 ->-> 4 -+----> 5 ->-> 7
# | | | |
# +-> 3 -+ +-->-> 6 -+
# | |
# +------------+
g = testgraph([1:2, 3:3, 4:5, 6:7, 8:8, 9:9, 0:-1],
[2,3,4,4,6,5,6,7,7])
st = SyntaxTree(g, 1)
stu = JuliaLowering.unalias_nodes(st)
@test st ≈ stu
@test length(stu._graph.edge_ranges) == 15
@test length(stu._graph.edges) == 14
# attrs of nodes 4-7
@test 4 == stu[1][1].orig == stu[2][1].orig
@test 5 == stu[1][1][1].orig == stu[2][1][1].orig
@test 6 == stu[1][1][2].orig == stu[2][1][2].orig == stu[2][2].orig
@test 7 == stu[1][1][1][1].orig == stu[1][1][2][1].orig ==
stu[2][1][1][1].orig == stu[2][1][2][1].orig == stu[2][2][1].orig
# ensure no duplication
@test stu[1][1][1][1]._id != stu[1][1][2][1]._id !=
stu[2][1][1][1]._id != stu[2][1][2][1]._id != stu[2][2][1]._id
end

@testset "prune" begin
# [1]-+-> 2 5 --> 6
# |
# +-> 3 --> 4 7
g = testgraph([1:2, 0:-1, 3:3, 0:-1, 4:4, 0:-1, 0:-1], [2, 3, 4, 6])
st = SyntaxTree(g, 1)
stp = JuliaLowering.prune(st)
@test st ≈ stp
@test length(syntax_graph(stp).edge_ranges) === 4
@test stp.source == LineNumberNode(1)
@test stp[1].source == LineNumberNode(2)
@test stp[2].source == LineNumberNode(3)
@test stp[2][1].source == LineNumberNode(4)

# (also checks that the last prune didn't destroy the graph)
# 1 -+-> 2 5 --> 6
# |
# +-> 3 --> 4 [7]
st = SyntaxTree(g, 7)
stp = JuliaLowering.prune(st)
@test st ≈ stp
@test length(syntax_graph(stp).edge_ranges) === 1
@test stp.orig == 7

# 1 -+->[2]->-> 4
# | |
# +-> 3 -+
g = testgraph([1:2, 3:3, 4:4, 0:-1], [2, 3, 4, 4])
st = SyntaxTree(g, 2)
stp = JuliaLowering.prune(st)
@test st ≈ stp
@test length(syntax_graph(stp).edge_ranges) === 2
@test stp.orig == 2
@test stp[1].orig == 4

# 9 -->[1]--> 5 src(1) = 2
# 10 --> 2 --> 6 src(2) = 3
# 11 --> 3 --> 7 src(3) = 4
# 12 --> 4 --> 8 else src(i) = line(i)
g = testgraph([1:1, 2:2, 3:3, 4:4, 0:-1, 0:-1, 0:-1, 0:-1, 5:5, 6:6, 7:7, 8:8],
[5, 6, 7, 8, 1, 2, 3, 4],
:source => Dict(
1=>2, 2=>3, 3=>4,
map(i->(i=>LineNumberNode(i)), 4:12)...))
st = SyntaxTree(g, 1)
stp = JuliaLowering.prune(st)
@test st ≈ stp
# 1, 5, 4, 8 should remain
@test length(syntax_graph(stp).edge_ranges) === 4
@test stp.source isa NodeId
orig_4 = SyntaxTree(syntax_graph(stp), stp.source)
@test orig_4.source === LineNumberNode(4)
@test numchildren(orig_4) === 1
@test orig_4[1].source === LineNumberNode(8)
@test stp[1].source === LineNumberNode(5)

# Try again with node 3 explicitly marked reachable
stp = JuliaLowering.prune(st, keep=JuliaLowering.SyntaxList(g, NodeId[3, 4]))
@test st ≈ stp
# 1, 5, 4, 8, and now 3, 7 as well
@test length(syntax_graph(stp).edge_ranges) === 6
@test stp.source isa NodeId
@test stp[1].source === LineNumberNode(5)

orig_3 = SyntaxTree(syntax_graph(stp), stp.source)
@test orig_3.source isa NodeId
orig_4 = SyntaxTree(syntax_graph(stp), orig_3.source)
@test orig_4.source === LineNumberNode(4)

@test numchildren(orig_3) === 1
@test numchildren(orig_4) === 1
@test orig_3[1].source === LineNumberNode(7)
@test orig_4[1].source === LineNumberNode(8)

# Try again with no node provenance
stp = JuliaLowering.prune(st, keep=nothing)
@test st ≈ stp
@test length(syntax_graph(stp).edge_ranges) === 2
@test stp.source === LineNumberNode(4)
@test stp[1].source === LineNumberNode(5)

# "real world" test with lowered output---not many properties we can
# check without fragile tests, but there are some.
test_mod = Module()
code = "begin; x1=1; x2=2; x3=3; x4=begin; 4; end; begin; end; end"
st0 = parsestmt(SyntaxTree, code)
st5 = JuliaLowering.lower(test_mod, st0)
stp = JuliaLowering.prune(st5)
@test st5 ≈ stp
@test length(syntax_graph(stp).edge_ranges) < length(syntax_graph(st5).edge_ranges)
@test stp.source isa NodeId
@test SyntaxTree(syntax_graph(stp), stp.source) ≈ st0
@test sourcetext(stp) == code
# try without preserving st0
stp = JuliaLowering.prune(st5, keep=nothing)
@test st5 ≈ stp
@test length(syntax_graph(stp).edge_ranges) < length(syntax_graph(st5).edge_ranges)
@test stp.source isa SourceRef
@test sourcetext(stp) == code
end
end
Loading