Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1453,13 +1453,22 @@ bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder,

bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
if (Function *F = Info.CB->getCalledFunction())
if (Function *F = Info.CB->getCalledFunction()) {
if (F->isIntrinsic()) {
assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
"Unexpected intrinsic");
return lowerChainCall(MIRBuilder, Info);
}

// Detect UB caused due to calling convention mismatches early to avoid
// debugging if errors occur later.
if (F->getCallingConv() != Info.CallConv) {
LLVM_DEBUG(dbgs() << "Failed to lower call: calling convention mismatch "
"(undefined behavior)\n");
return false;
Comment on lines +1466 to +1468
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is deferring handling to the DAG fallback, should directly handle the case

}
}

if (Info.IsVarArg) {
LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
return false;
Expand Down
32 changes: 30 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3696,11 +3696,39 @@ enum ChainCallArgIdx {
// The wave scratch offset register is used as the global base pointer.
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
CallingConv::ID CallConv = CLI.CallConv;
bool IsChainCallConv = AMDGPU::isChainCC(CallConv);

SelectionDAG &DAG = CLI.DAG;
// Detect UB caused due to calling convention mismatches early to avoid
// debugging if errors occur later.
if (Function *CalledFn = CLI.CB->getCalledFunction()) {
if (CalledFn->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) {
// This intrinsic handles calls to functions with specific calling
// conventions. These functions might have two valid calling conventions
// at a single callsite, requiring special handling.
if (Value *FnArg = CLI.CB->getArgOperand(0)) {
if (Function *WrappedFn = dyn_cast<Function>(FnArg)) {
switch (WrappedFn->getCallingConv()) {
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
break;
default:
return lowerUnhandledCall(
CLI, InVals,
"calling convention mismatch (undefined behavior) ");
}
}
}
} else {
CallingConv::ID CalledFnCC = CalledFn->getCallingConv();
if (CalledFnCC != CallConv) {
return lowerUnhandledCall(
CLI, InVals, "calling convention mismatch (undefined behavior) ");
}
}
}

bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
const SDLoc &DL = CLI.DL;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ declare [5 x i8] @external_a5i8_func_void() #0
declare hidden i32 @external_i32_func_i32(i32) #0

; amdgpu_gfx calling convention
declare i1 @external_gfx_i1_func_void() #0
declare i8 @external_gfx_i8_func_void() #0
declare i32 @external_gfx_i32_func_void() #0
declare { i32, i64 } @external_gfx_i32_i64_func_void() #0
declare hidden i32 @external_gfx_i32_func_i32(i32) #0
declare amdgpu_gfx i1 @external_gfx_i1_func_void() #0
declare amdgpu_gfx i8 @external_gfx_i8_func_void() #0
declare amdgpu_gfx i32 @external_gfx_i32_func_void() #0
declare amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() #0
declare hidden amdgpu_gfx i32 @external_gfx_i32_func_i32(i32) #0


define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ entry:
ret i32 %ret
}

declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))

define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: name: sibling_call_fastcc_multi_byval
Expand Down Expand Up @@ -969,7 +969,7 @@ entry:
ret void
}

declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)

; Callee has a byval and non-byval stack passed argument
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/calling-conventions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_endpgm
%val = call float @coldcc(float 1.0)
%val = call coldcc float @coldcc(float 1.0)
store float %val, ptr addrspace(1) poison
ret void
}
Expand Down Expand Up @@ -303,7 +303,7 @@ define amdgpu_kernel void @call_fastcc() #0 {
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_endpgm
%val = call float @fastcc(float 1.0)
%val = call fastcc float @fastcc(float 1.0)
store float %val, ptr addrspace(1) poison
ret void
}
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
; RUN: not llc -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ISEL %s
; CHECK-NO-ISEL: error: {{.*}} in function main {{.*}} calling convention mismatch (undefined behavior) foo

; RUN: not --crash llc -debug -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-ISEL %s
; CHECK-ISEL: Failed to lower call: calling convention mismatch (undefined behavior)

; COM: This test aims to identify invalid method calls.
; COM: By doing so, it simplifies debugging by exposing issues earlier in the pipeline.

define amdgpu_ps i32 @foo(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) {
ret i32 0
}

define amdgpu_ps i32 @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) {
main_body:
%C = call i32 @foo(ptr addrspace(4) null, ptr addrspace(4) %arg)
ret i32 %C
}
64 changes: 2 additions & 62 deletions llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -777,117 +777,57 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1)
ret void
}

define hidden void @void_func_void_clobber_s33() #1 {
define hidden amdgpu_gfx void @void_func_void_clobber_s33() #1 {
; GFX9-LABEL: void_func_void_clobber_s33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: v_writelane_b32 v0, s33, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s33, v0, 0
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v0, s33, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s33, v0, 0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v0, s33, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s33, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s33}"() #0
ret void
}

define hidden void @void_func_void_clobber_s34() #1 {
define hidden amdgpu_gfx void @void_func_void_clobber_s34() #1 {
; GFX9-LABEL: void_func_void_clobber_s34:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: v_writelane_b32 v0, s34, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s34, v0, 0
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v0, s34, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s34, v0, 0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v0, s34, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s34, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s34}"() #0
ret void
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/required-export-priority.ll
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ define amdgpu_ps void @test_export_in_callee(float %v) #0 {
; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GCN-NEXT: s_endpgm
%x = fadd float %v, 1.0
call void @test_export_gfx(float %x)
call amdgpu_gfx void @test_export_gfx(float %x)
ret void
}

Expand All @@ -330,7 +330,7 @@ define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
; GCN-NEXT: s_endpgm
%x = fadd float %v, 1.0
call void @llvm.amdgcn.s.setprio(i16 0)
call void @test_export_gfx(float %x)
call amdgpu_gfx void @test_export_gfx(float %x)
ret void
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/sibling-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ entry:
ret i32 %ret
}

declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))

define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: sibling_call_fastcc_multi_byval:
Expand Down Expand Up @@ -908,7 +908,7 @@ entry:
ret void
}

declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)

; Callee has a byval and non-byval stack passed argument
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
target triple = "amdgcn-amd-amdhsa"

; Unknown functions are conservatively passed all implicit parameters
declare void @unknown_call()
declare fastcc void @unknown_call()
; Use the same constant as a sgpr parameter (for the kernel id) and for a vector operation
define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id !0 {
; CHECK-LABEL: kern:
Expand Down