Skip to content

Commit 8dbfaee

Browse files
authored
Merge pull request #316 from JuliaGPU/tb/simplify_cloning
Simplify IR cloning.
2 parents 20e21e8 + 790324f commit 8dbfaee

File tree

3 files changed

+11
-108
lines changed

3 files changed

+11
-108
lines changed

src/irgen.jl

Lines changed: 7 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -492,54 +492,14 @@ function lower_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.
492492
end
493493
end
494494

495-
# inline the old IR
495+
# map the arguments
496496
value_map = Dict{LLVM.Value, LLVM.Value}(
497497
param => new_args[i] for (i,param) in enumerate(parameters(f))
498498
)
499499

500-
# before D96531 (part of LLVM 13), clone_into! wants to duplicate debug metadata
501-
# when the functions are part of the same module. that is invalid, because it
502-
# results in desynchronized debug intrinsics (GPUCompiler#284), so remove those.
503-
if LLVM.version() < v"13"
504-
removals = LLVM.Instruction[]
505-
for bb in blocks(f), inst in instructions(bb)
506-
if inst isa LLVM.CallInst && LLVM.name(called_value(inst)) == "llvm.dbg.declare"
507-
push!(removals, inst)
508-
end
509-
end
510-
for inst in removals
511-
@assert isempty(uses(inst))
512-
unsafe_delete!(LLVM.parent(inst), inst)
513-
end
514-
changes = LLVM.API.LLVMCloneFunctionChangeTypeGlobalChanges
515-
else
516-
changes = LLVM.API.LLVMCloneFunctionChangeTypeLocalChangesOnly
517-
end
518-
519-
# use a value materializer for replacing uses of the function in constants
520-
# NOTE: we assume kernel functions can't be called. on-device kernel launches,
521-
# e.g. CUDA's dynamic parallelism, will pass the function to an API instead,
522-
# and we update those constant expressions arguments here.
523-
function materializer(val)
524-
opcodes = (LLVM.API.LLVMPtrToInt, LLVM.API.LLVMAddrSpaceCast, LLVM.API.LLVMBitCast)
525-
if val isa LLVM.ConstantExpr && opcode(val) in opcodes
526-
target = operands(val)[1]
527-
if target == f
528-
return if opcode(val) == LLVM.API.LLVMPtrToInt
529-
LLVM.const_ptrtoint(new_f, llvmtype(val))
530-
elseif opcode(val) == LLVM.API.LLVMAddrSpaceCast
531-
LLVM.const_addrspacecast(new_f, llvmtype(val))
532-
elseif opcode(val) == LLVM.API.LLVMBitCast
533-
LLVM.const_bitcast(new_f, llvmtype(val))
534-
end
535-
end
536-
end
537-
return val
538-
end
539-
540-
# we don't want module-level changes, because otherwise LLVM will clone metadata,
541-
# resulting in mismatching references between `!dbg` metadata and `dbg` instructions
542-
clone_into!(new_f, f; value_map, changes, materializer)
500+
value_map[f] = new_f
501+
clone_into!(new_f, f; value_map,
502+
changes=LLVM.API.LLVMCloneFunctionChangeTypeGlobalChanges)
543503

544504
# fall through
545505
br!(builder, blocks(new_f)[2])
@@ -558,8 +518,6 @@ function lower_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.
558518
# NOTE: if we ever have legitimate uses of the old function, create a shim instead
559519
fn = LLVM.name(f)
560520
@assert isempty(uses(f))
561-
# XXX: there may still be metadata using this function. RAUW updates those,
562-
# but asserts on a debug build due to the updated function type.
563521
unsafe_delete!(mod, f)
564522
LLVM.name!(new_f, fn)
565523

@@ -654,43 +612,9 @@ function add_kernel_state!(@nospecialize(job::CompilerJob), mod::LLVM.Module,
654612
value_map[param] = new_param
655613
end
656614

657-
# use a value materializer for replacing uses of the function in constants
658-
function materializer(val)
659-
opcodes = (LLVM.API.LLVMPtrToInt, LLVM.API.LLVMAddrSpaceCast, LLVM.API.LLVMBitCast)
660-
if val isa LLVM.ConstantExpr && opcode(val) in opcodes
661-
src = operands(val)[1]
662-
if haskey(workmap, src)
663-
return if opcode(val) == LLVM.API.LLVMPtrToInt
664-
LLVM.const_ptrtoint(workmap[src], llvmtype(val))
665-
elseif opcode(val) == LLVM.API.LLVMAddrSpaceCast
666-
LLVM.const_addrspacecast(workmap[src], llvmtype(val))
667-
elseif opcode(val) == LLVM.API.LLVMBitCast
668-
LLVM.const_bitcast(workmap[src], llvmtype(val))
669-
end
670-
end
671-
end
672-
return val
673-
end
674-
675-
# before D96531 (part of LLVM 13), clone_into! wants to duplicate debug metadata
676-
# when the functions are part of the same module. that is invalid, because it
677-
# results in desynchronized debug intrinsics (GPUCompiler#284), so remove those.
678-
if LLVM.version() < v"13"
679-
removals = LLVM.Instruction[]
680-
for bb in blocks(f), inst in instructions(bb)
681-
if inst isa LLVM.CallInst && LLVM.name(called_value(inst)) == "llvm.dbg.declare"
682-
push!(removals, inst)
683-
end
684-
end
685-
for inst in removals
686-
@assert isempty(uses(inst))
687-
unsafe_delete!(LLVM.parent(inst), inst)
688-
end
689-
changes = LLVM.API.LLVMCloneFunctionChangeTypeGlobalChanges
690-
else
691-
changes = LLVM.API.LLVMCloneFunctionChangeTypeLocalChangesOnly
692-
end
693-
clone_into!(new_f, f; value_map, materializer, changes)
615+
value_map[f] = new_f
616+
clone_into!(new_f, f; value_map,
617+
changes=LLVM.API.LLVMCloneFunctionChangeTypeGlobalChanges)
694618

695619
# we can't remove this function yet, as we might still need to rewrite any called,
696620
# but remove the IR already

src/ptx.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,6 @@ function finish_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
188188
# TODO: optimization passes to clean-up byval
189189

190190
# add metadata annotations for the assembler to the module
191-
# NOTE: we need to do this as late as possible, because otherwise the metadata (which
192-
# refers to a specific function) can get lost when cloning functions. normally
193-
# RAUW updates those references, but we can't RAUW with a changed function type.
194191

195192
# property annotations
196193
annotations = Metadata[entry]

src/spirv.jl

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -288,30 +288,14 @@ function wrap_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.F
288288
end
289289
end
290290

291-
# inline the old IR
291+
# map the arguments
292292
value_map = Dict{LLVM.Value, LLVM.Value}(
293293
param => new_args[i] for (i,param) in enumerate(parameters(f))
294294
)
295295

296-
# before D96531 (part of LLVM 13), clone_into! wants to duplicate debug metadata
297-
# when the functions are part of the same module. that is invalid, because it
298-
# results in desynchronized debug intrinsics (GPUCompiler#284), so remove those.
299-
if LLVM.version() < v"13"
300-
removals = LLVM.Instruction[]
301-
for bb in blocks(f), inst in instructions(bb)
302-
if inst isa LLVM.CallInst && LLVM.name(called_value(inst)) == "llvm.dbg.declare"
303-
push!(removals, inst)
304-
end
305-
end
306-
for inst in removals
307-
@assert isempty(uses(inst))
308-
unsafe_delete!(LLVM.parent(inst), inst)
309-
end
310-
changes = LLVM.API.LLVMCloneFunctionChangeTypeGlobalChanges
311-
else
312-
changes = LLVM.API.LLVMCloneFunctionChangeTypeLocalChangesOnly
313-
end
314-
clone_into!(new_f, f; value_map, changes)
296+
value_map[f] = new_f
297+
clone_into!(new_f, f; value_map,
298+
changes=LLVM.API.LLVMCloneFunctionChangeTypeGlobalChanges)
315299

316300
# apply byval attributes again (`clone_into!` didn't due to the type mismatch)
317301
for i in 1:length(byval)
@@ -333,8 +317,6 @@ function wrap_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.F
333317
# NOTE: if we ever have legitimate uses of the old function, create a shim instead
334318
fn = LLVM.name(f)
335319
@assert isempty(uses(f))
336-
# XXX: there may still be metadata using this function. RAUW updates those,
337-
# but asserts on a debug build due to the updated function type.
338320
unsafe_delete!(mod, f)
339321
LLVM.name!(new_f, fn)
340322

0 commit comments

Comments
 (0)