diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index a15f193549936..e2e386d204d55 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -1453,13 +1453,22 @@ bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder, bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { - if (Function *F = Info.CB->getCalledFunction()) + if (Function *F = Info.CB->getCalledFunction()) { if (F->isIntrinsic()) { assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain && "Unexpected intrinsic"); return lowerChainCall(MIRBuilder, Info); } + // Detect UB caused due to calling convention mismatches early to avoid + // debugging if errors occur later. + if (F->getCallingConv() != Info.CallConv) { + LLVM_DEBUG(dbgs() << "Failed to lower call: calling convention mismatch " + "(undefined behavior)\n"); + return false; + } + } + if (Info.IsVarArg) { LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n"); return false; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c05ba42d999e9..481fd7c4e9dac 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3696,11 +3696,39 @@ enum ChainCallArgIdx { // The wave scratch offset register is used as the global base pointer. SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; CallingConv::ID CallConv = CLI.CallConv; - bool IsChainCallConv = AMDGPU::isChainCC(CallConv); - SelectionDAG &DAG = CLI.DAG; + // Detect UB caused due to calling convention mismatches early to avoid + // debugging if errors occur later. + if (Function *CalledFn = CLI.CB->getCalledFunction()) { + if (CalledFn->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) { + // This intrinsic handles calls to functions with specific calling + // conventions. These functions might have two valid calling conventions + // at a single callsite, requiring special handling. + if (Value *FnArg = CLI.CB->getArgOperand(0)) { + if (Function *WrappedFn = dyn_cast(FnArg)) { + switch (WrappedFn->getCallingConv()) { + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: + break; + default: + return lowerUnhandledCall( + CLI, InVals, + "calling convention mismatch (undefined behavior) "); + } + } + } + } else { + CallingConv::ID CalledFnCC = CalledFn->getCallingConv(); + if (CalledFnCC != CallConv) { + return lowerUnhandledCall( + CLI, InVals, "calling convention mismatch (undefined behavior) "); + } + } + } + bool IsChainCallConv = AMDGPU::isChainCC(CallConv); const SDLoc &DL = CLI.DL; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 0b6fe90b90654..96ee15f2eb78b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -60,11 +60,11 @@ declare [5 x i8] @external_a5i8_func_void() #0 declare hidden i32 @external_i32_func_i32(i32) #0 ; amdgpu_gfx calling convention -declare i1 @external_gfx_i1_func_void() #0 -declare i8 @external_gfx_i8_func_void() #0 -declare i32 @external_gfx_i32_func_void() #0 -declare { i32, i64 } @external_gfx_i32_i64_func_void() #0 -declare hidden i32 @external_gfx_i32_func_i32(i32) #0 +declare amdgpu_gfx i1 @external_gfx_i1_func_void() #0 +declare amdgpu_gfx i8 @external_gfx_i8_func_void() #0 +declare amdgpu_gfx i32 @external_gfx_i32_func_void() #0 +declare amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() #0 +declare hidden amdgpu_gfx i32 @external_gfx_i32_func_i32(i32) #0 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index c18c96d9c50c7..ca580d8f29c84 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -802,7 +802,7 @@ entry: ret i32 %ret } -declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64])) +declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64])) define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_fastcc_multi_byval @@ -969,7 +969,7 @@ entry: ret void } -declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32) +declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32) ; Callee has a byval and non-byval stack passed argument define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll index 0c335e45c9e2f..bc8e21e03251d 100644 --- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll +++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll @@ -209,7 +209,7 @@ define amdgpu_kernel void @call_coldcc() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX11-NEXT: global_store_b32 v[0:1], v0, off ; GFX11-NEXT: s_endpgm - %val = call float @coldcc(float 1.0) + %val = call coldcc float @coldcc(float 1.0) store float %val, ptr addrspace(1) poison ret void } @@ -303,7 +303,7 @@ define amdgpu_kernel void @call_fastcc() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX11-NEXT: global_store_b32 v[0:1], v0, off ; GFX11-NEXT: s_endpgm - %val = call float @fastcc(float 1.0) + %val = call fastcc float @fastcc(float 1.0) store float %val, ptr addrspace(1) poison ret void } diff --git a/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll b/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll new file mode 100644 index 0000000000000..a8442d9090857 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll @@ -0,0 +1,18 @@ +; RUN: not llc -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ISEL %s +; CHECK-NO-ISEL: error: {{.*}} in function main {{.*}} calling convention mismatch (undefined behavior) foo + +; RUN: not --crash llc -debug -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-ISEL %s +; CHECK-ISEL: Failed to lower call: calling convention mismatch (undefined behavior) + +; COM: This test aims to identify invalid method calls. +; COM: By doing so, it simplifies debugging by exposing issues earlier in the pipeline. + +define amdgpu_ps i32 @foo(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) { + ret i32 0 +} + +define amdgpu_ps i32 @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) { +main_body: + %C = call i32 @foo(ptr addrspace(4) null, ptr addrspace(4) %arg) + ret i32 %C +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index 4afc2fc972a28..13fff0215804d 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -777,117 +777,57 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ret void } -define hidden void @void_func_void_clobber_s33() #1 { +define hidden amdgpu_gfx void @void_func_void_clobber_s33() #1 { ; GFX9-LABEL: void_func_void_clobber_s33: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v0, s33, 0 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s33, v0, 0 -; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: void_func_void_clobber_s33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v0, s33, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s33, v0, 0 -; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_void_clobber_s33: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v0, s33, 0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; clobber ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s33, v0, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; clobber", "~{s33}"() #0 ret void } -define hidden void @void_func_void_clobber_s34() #1 { +define hidden amdgpu_gfx void @void_func_void_clobber_s34() #1 { ; GFX9-LABEL: void_func_void_clobber_s34: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v0, s34, 0 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s34, v0, 0 -; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: void_func_void_clobber_s34: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v0, s34, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s34, v0, 0 -; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_void_clobber_s34: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v0, s34, 0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; clobber ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s34, v0, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; clobber", "~{s34}"() #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll index f14cd4488ef1e..4aa1ddee2efe3 100644 --- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll +++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll @@ -310,7 +310,7 @@ define amdgpu_ps void @test_export_in_callee(float %v) #0 { ; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GCN-NEXT: s_endpgm %x = fadd float %v, 1.0 - call void @test_export_gfx(float %x) + call amdgpu_gfx void @test_export_gfx(float %x) ret void } @@ -330,7 +330,7 @@ define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 { ; GCN-NEXT: s_endpgm %x = fadd float %v, 1.0 call void @llvm.amdgcn.s.setprio(i16 0) - call void @test_export_gfx(float %x) + call amdgpu_gfx void @test_export_gfx(float %x) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 0221bb0cf4f35..27e24828a8630 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -873,7 +873,7 @@ entry: ret i32 %ret } -declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64])) +declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64])) define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-LABEL: sibling_call_fastcc_multi_byval: @@ -908,7 +908,7 @@ entry: ret void } -declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32) +declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32) ; Callee has a byval and non-byval stack passed argument define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll index c779f1d548ea0..16e0a34376b29 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll @@ -4,7 +4,7 @@ target triple = "amdgcn-amd-amdhsa" ; Unknown functions are conservatively passed all implicit parameters -declare void @unknown_call() +declare fastcc void @unknown_call() ; Use the same constant as a sgpr parameter (for the kernel id) and for a vector operation define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id !0 { ; CHECK-LABEL: kern: