Skip to content

Commit 5764ca3

Browse files
committed
[AMDGPU] Enhance error handling for mismatched calling conventions
Introduce early failure when mismatched calling conventions are detected; preventing errors from propagating further.
1 parent c5c4f0d commit 5764ca3

File tree

10 files changed

+74
-79
lines changed

10 files changed

+74
-79
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1453,13 +1453,22 @@ bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder,
14531453

14541454
bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
14551455
CallLoweringInfo &Info) const {
1456-
if (Function *F = Info.CB->getCalledFunction())
1456+
if (Function *F = Info.CB->getCalledFunction()) {
14571457
if (F->isIntrinsic()) {
14581458
assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
14591459
"Unexpected intrinsic");
14601460
return lowerChainCall(MIRBuilder, Info);
14611461
}
14621462

1463+
// Detect UB caused due to calling convention mismatches early to avoid
1464+
// debugging if errors occur later.
1465+
if (F->getCallingConv() != Info.CallConv) {
1466+
LLVM_DEBUG(dbgs() << "Failed to lower call: calling convention mismatch "
1467+
"(undefined behavior)\n");
1468+
return false;
1469+
}
1470+
}
1471+
14631472
if (Info.IsVarArg) {
14641473
LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
14651474
return false;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3696,11 +3696,39 @@ enum ChainCallArgIdx {
36963696
// The wave scratch offset register is used as the global base pointer.
36973697
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
36983698
SmallVectorImpl<SDValue> &InVals) const {
3699+
SelectionDAG &DAG = CLI.DAG;
36993700
CallingConv::ID CallConv = CLI.CallConv;
3700-
bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
37013701

3702-
SelectionDAG &DAG = CLI.DAG;
3702+
// Detect UB caused due to calling convention mismatches early to avoid
3703+
// debugging if errors occur later.
3704+
if (Function *CalledFn = CLI.CB->getCalledFunction()) {
3705+
if (CalledFn->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) {
3706+
// This intrinsic handles calls to functions with specific calling
3707+
// conventions. These functions might have two valid calling conventions
3708+
// at a single callsite, requiring special handling.
3709+
if (Value *FnArg = CLI.CB->getArgOperand(0)) {
3710+
if (Function *WrappedFn = dyn_cast<Function>(FnArg)) {
3711+
switch (WrappedFn->getCallingConv()) {
3712+
case CallingConv::AMDGPU_CS_Chain:
3713+
case CallingConv::AMDGPU_CS_ChainPreserve:
3714+
break;
3715+
default:
3716+
return lowerUnhandledCall(
3717+
CLI, InVals,
3718+
"calling convention mismatch (undefined behavior) ");
3719+
}
3720+
}
3721+
}
3722+
} else {
3723+
CallingConv::ID CalledFnCC = CalledFn->getCallingConv();
3724+
if (CalledFnCC != CallConv) {
3725+
return lowerUnhandledCall(
3726+
CLI, InVals, "calling convention mismatch (undefined behavior) ");
3727+
}
3728+
}
3729+
}
37033730

3731+
bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
37043732
const SDLoc &DL = CLI.DL;
37053733
SDValue Chain = CLI.Chain;
37063734
SDValue Callee = CLI.Callee;

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ declare [5 x i8] @external_a5i8_func_void() #0
6060
declare hidden i32 @external_i32_func_i32(i32) #0
6161

6262
; amdgpu_gfx calling convention
63-
declare i1 @external_gfx_i1_func_void() #0
64-
declare i8 @external_gfx_i8_func_void() #0
65-
declare i32 @external_gfx_i32_func_void() #0
66-
declare { i32, i64 } @external_gfx_i32_i64_func_void() #0
67-
declare hidden i32 @external_gfx_i32_func_i32(i32) #0
63+
declare amdgpu_gfx i1 @external_gfx_i1_func_void() #0
64+
declare amdgpu_gfx i8 @external_gfx_i8_func_void() #0
65+
declare amdgpu_gfx i32 @external_gfx_i32_func_void() #0
66+
declare amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() #0
67+
declare hidden amdgpu_gfx i32 @external_gfx_i32_func_i32(i32) #0
6868

6969

7070
define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -802,7 +802,7 @@ entry:
802802
ret i32 %ret
803803
}
804804

805-
declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
805+
declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
806806

807807
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
808808
; GCN-LABEL: name: sibling_call_fastcc_multi_byval
@@ -969,7 +969,7 @@ entry:
969969
ret void
970970
}
971971

972-
declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
972+
declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
973973

974974
; Callee has a byval and non-byval stack passed argument
975975
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {

llvm/test/CodeGen/AMDGPU/calling-conventions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
209209
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
210210
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
211211
; GFX11-NEXT: s_endpgm
212-
%val = call float @coldcc(float 1.0)
212+
%val = call coldcc float @coldcc(float 1.0)
213213
store float %val, ptr addrspace(1) poison
214214
ret void
215215
}
@@ -303,7 +303,7 @@ define amdgpu_kernel void @call_fastcc() #0 {
303303
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
304304
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
305305
; GFX11-NEXT: s_endpgm
306-
%val = call float @fastcc(float 1.0)
306+
%val = call fastcc float @fastcc(float 1.0)
307307
store float %val, ptr addrspace(1) poison
308308
ret void
309309
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; RUN: not llc -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ISEL %s
2+
; CHECK-NO-ISEL: error: {{.*}} in function main {{.*}} calling convention mismatch (undefined behavior) foo
3+
4+
; RUN: not --crash llc -debug -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-ISEL %s
5+
; CHECK-ISEL: Failed to lower call: calling convention mismatch (undefined behavior)
6+
7+
; COM: This test aims to identify invalid method calls.
8+
; COM: By doing so, it simplifies debugging by exposing issues earlier in the pipeline.
9+
10+
define amdgpu_ps i32 @foo(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) {
11+
ret i32 0
12+
}
13+
14+
define amdgpu_ps i32 @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) {
15+
main_body:
16+
%C = call i32 @foo(ptr addrspace(4) null, ptr addrspace(4) %arg)
17+
ret i32 %C
18+
}

llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll

Lines changed: 2 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -777,117 +777,57 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1)
777777
ret void
778778
}
779779

780-
define hidden void @void_func_void_clobber_s33() #1 {
780+
define hidden amdgpu_gfx void @void_func_void_clobber_s33() #1 {
781781
; GFX9-LABEL: void_func_void_clobber_s33:
782782
; GFX9: ; %bb.0:
783783
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784-
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
785-
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
786-
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
787-
; GFX9-NEXT: v_writelane_b32 v0, s33, 0
788784
; GFX9-NEXT: ;;#ASMSTART
789785
; GFX9-NEXT: ; clobber
790786
; GFX9-NEXT: ;;#ASMEND
791-
; GFX9-NEXT: v_readlane_b32 s33, v0, 0
792-
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
793-
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
794-
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
795-
; GFX9-NEXT: s_waitcnt vmcnt(0)
796787
; GFX9-NEXT: s_setpc_b64 s[30:31]
797788
;
798789
; GFX10-LABEL: void_func_void_clobber_s33:
799790
; GFX10: ; %bb.0:
800791
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801-
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
802-
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
803-
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
804-
; GFX10-NEXT: s_mov_b32 exec_lo, s4
805-
; GFX10-NEXT: v_writelane_b32 v0, s33, 0
806792
; GFX10-NEXT: ;;#ASMSTART
807793
; GFX10-NEXT: ; clobber
808794
; GFX10-NEXT: ;;#ASMEND
809-
; GFX10-NEXT: v_readlane_b32 s33, v0, 0
810-
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
811-
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
812-
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
813-
; GFX10-NEXT: s_mov_b32 exec_lo, s4
814-
; GFX10-NEXT: s_waitcnt vmcnt(0)
815795
; GFX10-NEXT: s_setpc_b64 s[30:31]
816796
;
817797
; GFX11-LABEL: void_func_void_clobber_s33:
818798
; GFX11: ; %bb.0:
819799
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820-
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
821-
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
822-
; GFX11-NEXT: s_mov_b32 exec_lo, s0
823-
; GFX11-NEXT: v_writelane_b32 v0, s33, 0
824800
; GFX11-NEXT: ;;#ASMSTART
825801
; GFX11-NEXT: ; clobber
826802
; GFX11-NEXT: ;;#ASMEND
827-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
828-
; GFX11-NEXT: v_readlane_b32 s33, v0, 0
829-
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
830-
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
831-
; GFX11-NEXT: s_mov_b32 exec_lo, s0
832-
; GFX11-NEXT: s_waitcnt vmcnt(0)
833803
; GFX11-NEXT: s_setpc_b64 s[30:31]
834804
call void asm sideeffect "; clobber", "~{s33}"() #0
835805
ret void
836806
}
837807

838-
define hidden void @void_func_void_clobber_s34() #1 {
808+
define hidden amdgpu_gfx void @void_func_void_clobber_s34() #1 {
839809
; GFX9-LABEL: void_func_void_clobber_s34:
840810
; GFX9: ; %bb.0:
841811
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842-
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
843-
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
844-
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
845-
; GFX9-NEXT: v_writelane_b32 v0, s34, 0
846812
; GFX9-NEXT: ;;#ASMSTART
847813
; GFX9-NEXT: ; clobber
848814
; GFX9-NEXT: ;;#ASMEND
849-
; GFX9-NEXT: v_readlane_b32 s34, v0, 0
850-
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
851-
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
852-
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
853-
; GFX9-NEXT: s_waitcnt vmcnt(0)
854815
; GFX9-NEXT: s_setpc_b64 s[30:31]
855816
;
856817
; GFX10-LABEL: void_func_void_clobber_s34:
857818
; GFX10: ; %bb.0:
858819
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859-
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
860-
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
861-
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
862-
; GFX10-NEXT: s_mov_b32 exec_lo, s4
863-
; GFX10-NEXT: v_writelane_b32 v0, s34, 0
864820
; GFX10-NEXT: ;;#ASMSTART
865821
; GFX10-NEXT: ; clobber
866822
; GFX10-NEXT: ;;#ASMEND
867-
; GFX10-NEXT: v_readlane_b32 s34, v0, 0
868-
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
869-
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
870-
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
871-
; GFX10-NEXT: s_mov_b32 exec_lo, s4
872-
; GFX10-NEXT: s_waitcnt vmcnt(0)
873823
; GFX10-NEXT: s_setpc_b64 s[30:31]
874824
;
875825
; GFX11-LABEL: void_func_void_clobber_s34:
876826
; GFX11: ; %bb.0:
877827
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
878-
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
879-
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
880-
; GFX11-NEXT: s_mov_b32 exec_lo, s0
881-
; GFX11-NEXT: v_writelane_b32 v0, s34, 0
882828
; GFX11-NEXT: ;;#ASMSTART
883829
; GFX11-NEXT: ; clobber
884830
; GFX11-NEXT: ;;#ASMEND
885-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
886-
; GFX11-NEXT: v_readlane_b32 s34, v0, 0
887-
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
888-
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
889-
; GFX11-NEXT: s_mov_b32 exec_lo, s0
890-
; GFX11-NEXT: s_waitcnt vmcnt(0)
891831
; GFX11-NEXT: s_setpc_b64 s[30:31]
892832
call void asm sideeffect "; clobber", "~{s34}"() #0
893833
ret void

llvm/test/CodeGen/AMDGPU/required-export-priority.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ define amdgpu_ps void @test_export_in_callee(float %v) #0 {
310310
; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
311311
; GCN-NEXT: s_endpgm
312312
%x = fadd float %v, 1.0
313-
call void @test_export_gfx(float %x)
313+
call amdgpu_gfx void @test_export_gfx(float %x)
314314
ret void
315315
}
316316

@@ -330,7 +330,7 @@ define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
330330
; GCN-NEXT: s_endpgm
331331
%x = fadd float %v, 1.0
332332
call void @llvm.amdgcn.s.setprio(i16 0)
333-
call void @test_export_gfx(float %x)
333+
call amdgpu_gfx void @test_export_gfx(float %x)
334334
ret void
335335
}
336336

llvm/test/CodeGen/AMDGPU/sibling-call.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ entry:
873873
ret i32 %ret
874874
}
875875

876-
declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
876+
declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
877877

878878
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
879879
; GCN-LABEL: sibling_call_fastcc_multi_byval:
@@ -908,7 +908,7 @@ entry:
908908
ret void
909909
}
910910

911-
declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
911+
declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
912912

913913
; Callee has a byval and non-byval stack passed argument
914914
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {

llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
target triple = "amdgcn-amd-amdhsa"
55

66
; Unknown functions are conservatively passed all implicit parameters
7-
declare void @unknown_call()
7+
declare fastcc void @unknown_call()
88
; Use the same constant as a sgpr parameter (for the kernel id) and for a vector operation
99
define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id !0 {
1010
; CHECK-LABEL: kern:

0 commit comments

Comments
 (0)