-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Enhance error handling for mismatched calling conventions #137825
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Introduce early failure when mismatched calling conventions are detected; preventing errors from propagating further.
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-globalisel Author: Manuel Carrasco (mgcarrasco) ChangesIntroduce early failure when mismatched calling conventions are detected to prevent errors from propagating further in the AMDGPU backend. Full diff: https://github.com/llvm/llvm-project/pull/137825.diff 10 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index a15f193549936..e2e386d204d55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1453,13 +1453,22 @@ bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder,
bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
- if (Function *F = Info.CB->getCalledFunction())
+ if (Function *F = Info.CB->getCalledFunction()) {
if (F->isIntrinsic()) {
assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
"Unexpected intrinsic");
return lowerChainCall(MIRBuilder, Info);
}
+ // Detect UB caused due to calling convention mismatches early to avoid
+ // debugging if errors occur later.
+ if (F->getCallingConv() != Info.CallConv) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call: calling convention mismatch "
+ "(undefined behavior)\n");
+ return false;
+ }
+ }
+
if (Info.IsVarArg) {
LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c05ba42d999e9..481fd7c4e9dac 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3696,11 +3696,39 @@ enum ChainCallArgIdx {
// The wave scratch offset register is used as the global base pointer.
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
CallingConv::ID CallConv = CLI.CallConv;
- bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
- SelectionDAG &DAG = CLI.DAG;
+ // Detect UB caused due to calling convention mismatches early to avoid
+ // debugging if errors occur later.
+ if (Function *CalledFn = CLI.CB->getCalledFunction()) {
+ if (CalledFn->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) {
+ // This intrinsic handles calls to functions with specific calling
+ // conventions. These functions might have two valid calling conventions
+ // at a single callsite, requiring special handling.
+ if (Value *FnArg = CLI.CB->getArgOperand(0)) {
+ if (Function *WrappedFn = dyn_cast<Function>(FnArg)) {
+ switch (WrappedFn->getCallingConv()) {
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ break;
+ default:
+ return lowerUnhandledCall(
+ CLI, InVals,
+ "calling convention mismatch (undefined behavior) ");
+ }
+ }
+ }
+ } else {
+ CallingConv::ID CalledFnCC = CalledFn->getCallingConv();
+ if (CalledFnCC != CallConv) {
+ return lowerUnhandledCall(
+ CLI, InVals, "calling convention mismatch (undefined behavior) ");
+ }
+ }
+ }
+ bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
const SDLoc &DL = CLI.DL;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 0b6fe90b90654..96ee15f2eb78b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -60,11 +60,11 @@ declare [5 x i8] @external_a5i8_func_void() #0
declare hidden i32 @external_i32_func_i32(i32) #0
; amdgpu_gfx calling convention
-declare i1 @external_gfx_i1_func_void() #0
-declare i8 @external_gfx_i8_func_void() #0
-declare i32 @external_gfx_i32_func_void() #0
-declare { i32, i64 } @external_gfx_i32_i64_func_void() #0
-declare hidden i32 @external_gfx_i32_func_i32(i32) #0
+declare amdgpu_gfx i1 @external_gfx_i1_func_void() #0
+declare amdgpu_gfx i8 @external_gfx_i8_func_void() #0
+declare amdgpu_gfx i32 @external_gfx_i32_func_void() #0
+declare amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() #0
+declare hidden amdgpu_gfx i32 @external_gfx_i32_func_i32(i32) #0
define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index c18c96d9c50c7..ca580d8f29c84 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -802,7 +802,7 @@ entry:
ret i32 %ret
}
-declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
+declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: name: sibling_call_fastcc_multi_byval
@@ -969,7 +969,7 @@ entry:
ret void
}
-declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
+declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
; Callee has a byval and non-byval stack passed argument
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 0c335e45c9e2f..bc8e21e03251d 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -209,7 +209,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_endpgm
- %val = call float @coldcc(float 1.0)
+ %val = call coldcc float @coldcc(float 1.0)
store float %val, ptr addrspace(1) poison
ret void
}
@@ -303,7 +303,7 @@ define amdgpu_kernel void @call_fastcc() #0 {
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_endpgm
- %val = call float @fastcc(float 1.0)
+ %val = call fastcc float @fastcc(float 1.0)
store float %val, ptr addrspace(1) poison
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll b/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll
new file mode 100644
index 0000000000000..a8442d9090857
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ISEL %s
+; CHECK-NO-ISEL: error: {{.*}} in function main {{.*}} calling convention mismatch (undefined behavior) foo
+
+; RUN: not --crash llc -debug -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-ISEL %s
+; CHECK-ISEL: Failed to lower call: calling convention mismatch (undefined behavior)
+
+; COM: This test aims to identify invalid method calls.
+; COM: By doing so, it simplifies debugging by exposing issues earlier in the pipeline.
+
+define amdgpu_ps i32 @foo(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) {
+ ret i32 0
+}
+
+define amdgpu_ps i32 @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) {
+main_body:
+ %C = call i32 @foo(ptr addrspace(4) null, ptr addrspace(4) %arg)
+ ret i32 %C
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
index 4afc2fc972a28..13fff0215804d 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
@@ -777,117 +777,57 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1)
ret void
}
-define hidden void @void_func_void_clobber_s33() #1 {
+define hidden amdgpu_gfx void @void_func_void_clobber_s33() #1 {
; GFX9-LABEL: void_func_void_clobber_s33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v0, s33, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: v_readlane_b32 s33, v0, 0
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v0, s33, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
-; GFX10-NEXT: v_readlane_b32 s33, v0, 0
-; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: v_writelane_b32 v0, s33, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_readlane_b32 s33, v0, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s33}"() #0
ret void
}
-define hidden void @void_func_void_clobber_s34() #1 {
+define hidden amdgpu_gfx void @void_func_void_clobber_s34() #1 {
; GFX9-LABEL: void_func_void_clobber_s34:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v0, s34, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: v_readlane_b32 s34, v0, 0
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v0, s34, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
-; GFX10-NEXT: v_readlane_b32 s34, v0, 0
-; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: v_writelane_b32 v0, s34, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_readlane_b32 s34, v0, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s34}"() #0
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index f14cd4488ef1e..4aa1ddee2efe3 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -310,7 +310,7 @@ define amdgpu_ps void @test_export_in_callee(float %v) #0 {
; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GCN-NEXT: s_endpgm
%x = fadd float %v, 1.0
- call void @test_export_gfx(float %x)
+ call amdgpu_gfx void @test_export_gfx(float %x)
ret void
}
@@ -330,7 +330,7 @@ define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
; GCN-NEXT: s_endpgm
%x = fadd float %v, 1.0
call void @llvm.amdgcn.s.setprio(i16 0)
- call void @test_export_gfx(float %x)
+ call amdgpu_gfx void @test_export_gfx(float %x)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 0221bb0cf4f35..27e24828a8630 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -873,7 +873,7 @@ entry:
ret i32 %ret
}
-declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
+declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: sibling_call_fastcc_multi_byval:
@@ -908,7 +908,7 @@ entry:
ret void
}
-declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
+declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
; Callee has a byval and non-byval stack passed argument
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
index c779f1d548ea0..16e0a34376b29 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
@@ -4,7 +4,7 @@
target triple = "amdgcn-amd-amdhsa"
; Unknown functions are conservatively passed all implicit parameters
-declare void @unknown_call()
+declare fastcc void @unknown_call()
; Use the same constant as a sgpr parameter (for the kernel id) and for a vector operation
define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id !0 {
; CHECK-LABEL: kern:
|
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Undefined behavior is not a compiler error. This should handle the call directly, probably by just emitting no code
| LLVM_DEBUG(dbgs() << "Failed to lower call: calling convention mismatch " | ||
| "(undefined behavior)\n"); | ||
| return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is deferring handling to the DAG fallback, should directly handle the case
Thanks for the clarification on compiler error and UB. I will close the PR for now and submit the UB fixes for existing test cases separately. |
Introduce early failure when mismatched calling conventions are detected to prevent errors from propagating further in the AMDGPU backend.