@@ -492,54 +492,14 @@ function lower_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.
492
492
end
493
493
end
494
494
495
- # inline the old IR
495
+ # map the arguments
496
496
value_map = Dict {LLVM.Value, LLVM.Value} (
497
497
param => new_args[i] for (i,param) in enumerate (parameters (f))
498
498
)
499
499
500
- # before D96531 (part of LLVM 13), clone_into! wants to duplicate debug metadata
501
- # when the functions are part of the same module. that is invalid, because it
502
- # results in desynchronized debug intrinsics (GPUCompiler#284), so remove those.
503
- if LLVM. version () < v " 13"
504
- removals = LLVM. Instruction[]
505
- for bb in blocks (f), inst in instructions (bb)
506
- if inst isa LLVM. CallInst && LLVM. name (called_value (inst)) == " llvm.dbg.declare"
507
- push! (removals, inst)
508
- end
509
- end
510
- for inst in removals
511
- @assert isempty (uses (inst))
512
- unsafe_delete! (LLVM. parent (inst), inst)
513
- end
514
- changes = LLVM. API. LLVMCloneFunctionChangeTypeGlobalChanges
515
- else
516
- changes = LLVM. API. LLVMCloneFunctionChangeTypeLocalChangesOnly
517
- end
518
-
519
- # use a value materializer for replacing uses of the function in constants
520
- # NOTE: we assume kernel functions can't be called. on-device kernel launches,
521
- # e.g. CUDA's dynamic parallelism, will pass the function to an API instead,
522
- # and we update those constant expressions arguments here.
523
- function materializer (val)
524
- opcodes = (LLVM. API. LLVMPtrToInt, LLVM. API. LLVMAddrSpaceCast, LLVM. API. LLVMBitCast)
525
- if val isa LLVM. ConstantExpr && opcode (val) in opcodes
526
- target = operands (val)[1 ]
527
- if target == f
528
- return if opcode (val) == LLVM. API. LLVMPtrToInt
529
- LLVM. const_ptrtoint (new_f, llvmtype (val))
530
- elseif opcode (val) == LLVM. API. LLVMAddrSpaceCast
531
- LLVM. const_addrspacecast (new_f, llvmtype (val))
532
- elseif opcode (val) == LLVM. API. LLVMBitCast
533
- LLVM. const_bitcast (new_f, llvmtype (val))
534
- end
535
- end
536
- end
537
- return val
538
- end
539
-
540
- # we don't want module-level changes, because otherwise LLVM will clone metadata,
541
- # resulting in mismatching references between `!dbg` metadata and `dbg` instructions
542
- clone_into! (new_f, f; value_map, changes, materializer)
500
+ value_map[f] = new_f
501
+ clone_into! (new_f, f; value_map,
502
+ changes= LLVM. API. LLVMCloneFunctionChangeTypeGlobalChanges)
543
503
544
504
# fall through
545
505
br! (builder, blocks (new_f)[2 ])
@@ -558,8 +518,6 @@ function lower_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.
558
518
# NOTE: if we ever have legitimate uses of the old function, create a shim instead
559
519
fn = LLVM. name (f)
560
520
@assert isempty (uses (f))
561
- # XXX : there may still be metadata using this function. RAUW updates those,
562
- # but asserts on a debug build due to the updated function type.
563
521
unsafe_delete! (mod, f)
564
522
LLVM. name! (new_f, fn)
565
523
@@ -654,43 +612,9 @@ function add_kernel_state!(@nospecialize(job::CompilerJob), mod::LLVM.Module,
654
612
value_map[param] = new_param
655
613
end
656
614
657
- # use a value materializer for replacing uses of the function in constants
658
- function materializer (val)
659
- opcodes = (LLVM. API. LLVMPtrToInt, LLVM. API. LLVMAddrSpaceCast, LLVM. API. LLVMBitCast)
660
- if val isa LLVM. ConstantExpr && opcode (val) in opcodes
661
- src = operands (val)[1 ]
662
- if haskey (workmap, src)
663
- return if opcode (val) == LLVM. API. LLVMPtrToInt
664
- LLVM. const_ptrtoint (workmap[src], llvmtype (val))
665
- elseif opcode (val) == LLVM. API. LLVMAddrSpaceCast
666
- LLVM. const_addrspacecast (workmap[src], llvmtype (val))
667
- elseif opcode (val) == LLVM. API. LLVMBitCast
668
- LLVM. const_bitcast (workmap[src], llvmtype (val))
669
- end
670
- end
671
- end
672
- return val
673
- end
674
-
675
- # before D96531 (part of LLVM 13), clone_into! wants to duplicate debug metadata
676
- # when the functions are part of the same module. that is invalid, because it
677
- # results in desynchronized debug intrinsics (GPUCompiler#284), so remove those.
678
- if LLVM. version () < v " 13"
679
- removals = LLVM. Instruction[]
680
- for bb in blocks (f), inst in instructions (bb)
681
- if inst isa LLVM. CallInst && LLVM. name (called_value (inst)) == " llvm.dbg.declare"
682
- push! (removals, inst)
683
- end
684
- end
685
- for inst in removals
686
- @assert isempty (uses (inst))
687
- unsafe_delete! (LLVM. parent (inst), inst)
688
- end
689
- changes = LLVM. API. LLVMCloneFunctionChangeTypeGlobalChanges
690
- else
691
- changes = LLVM. API. LLVMCloneFunctionChangeTypeLocalChangesOnly
692
- end
693
- clone_into! (new_f, f; value_map, materializer, changes)
615
+ value_map[f] = new_f
616
+ clone_into! (new_f, f; value_map,
617
+ changes= LLVM. API. LLVMCloneFunctionChangeTypeGlobalChanges)
694
618
695
619
# we can't remove this function yet, as we might still need to rewrite any called,
696
620
# but remove the IR already
0 commit comments