Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2671,18 +2671,6 @@ def int_amdgcn_cs_chain:
],
[IntrConvergent, IntrNoReturn, ImmArg<ArgIndex<4>>]>;

// Run a function with all the lanes enabled. Only direct calls are allowed. The
// first argument is the callee, which must have the `amdgpu_gfx_whole_wave`
// calling convention and must not be variadic. The remaining arguments to the
// callee are taken from the arguments passed to the intrinsic. Lanes that are
// inactive at the point of the call will receive poison. The return value is
// the return value of the callee for the active lanes (there is no return
// value in the inactive ones).
def int_amdgcn_call_whole_wave:
Intrinsic<[llvm_any_ty], // The return type of the callee.
[llvm_anyptr_ty, // The callee.
llvm_vararg_ty], // The arguments to the callee.
[IntrConvergent]>;

//===----------------------------------------------------------------------===//
// CI+ Intrinsics
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2556,7 +2556,6 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*ConstantInt::getTrue(CI.getType())));
return true;
case Intrinsic::amdgcn_cs_chain:
case Intrinsic::amdgcn_call_whole_wave:
return translateCallBase(CI, MIRBuilder);
case Intrinsic::fptrunc_round: {
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
Expand Down
37 changes: 0 additions & 37 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7984,43 +7984,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
HasTailCall = true;
return;
}
case Intrinsic::amdgcn_call_whole_wave: {
TargetLowering::ArgListTy Args;

// The first argument is the callee. Skip it when assembling the call args.
TargetLowering::ArgListEntry Arg;
for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) {
Arg.Node = getValue(I.getArgOperand(Idx));
Arg.Ty = I.getArgOperand(Idx)->getType();
Arg.setAttributes(&I, Idx);
Args.push_back(Arg);
}

SDValue ConvControlToken;
if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
ConvControlToken = getValue(Token);
}

TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
.setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
getValue(I.getArgOperand(0)), std::move(Args))
.setTailCall(false)
.setIsPreallocated(
I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
.setConvergent(I.isConvergent())
.setConvergenceControlToken(ConvControlToken);
CLI.CB = &I;

std::pair<SDValue, SDValue> Result =
lowerInvokable(CLI, /*EHPadBB=*/nullptr);

if (Result.first.getNode())
setValue(&I, Result.first);
return;
}
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
SDValue Mask = getValue(I.getOperand(1));
Expand Down
30 changes: 0 additions & 30 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6612,36 +6612,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"Value for inactive lanes must be a VGPR function argument", &Call);
break;
}
case Intrinsic::amdgcn_call_whole_wave: {
auto F = dyn_cast<Function>(Call.getArgOperand(0));
Check(F, "Indirect whole wave calls are not allowed", &Call);

CallingConv::ID CC = F->getCallingConv();
Check(CC == CallingConv::AMDGPU_Gfx_WholeWave,
"Callee must have the amdgpu_gfx_whole_wave calling convention",
&Call);

Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);

Check(Call.arg_size() == F->arg_size(),
"Call argument count must match callee argument count", &Call);

// The first argument of the call is the callee, and the first argument of
// the callee is the active mask. The rest of the arguments must match.
Check(F->arg_begin()->getType()->isIntegerTy(1),
"Callee must have i1 as its first argument", &Call);
for (auto [CallArg, FuncArg] :
drop_begin(zip_equal(Call.args(), F->args()))) {
Check(CallArg->getType() == FuncArg.getType(),
"Argument types must match", &Call);

// Check that inreg attributes match between call site and function
Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
FuncArg.hasInRegAttr(),
"Argument inreg attributes must match", &Call);
}
break;
}
case Intrinsic::amdgcn_s_prefetch_data: {
Check(
AMDGPU::isFlatGlobalAddrSpace(
Expand Down
19 changes: 3 additions & 16 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1464,22 +1464,9 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
if (Function *F = Info.CB->getCalledFunction())
if (F->isIntrinsic()) {
switch (F->getIntrinsicID()) {
case Intrinsic::amdgcn_cs_chain:
return lowerChainCall(MIRBuilder, Info);
case Intrinsic::amdgcn_call_whole_wave:
Info.CallConv = CallingConv::AMDGPU_Gfx_WholeWave;

// Get the callee from the original instruction, so it doesn't look like
// this is an indirect call.
Info.Callee = MachineOperand::CreateGA(
cast<GlobalValue>(Info.CB->getOperand(0)), /*Offset=*/0);
Info.OrigArgs.erase(Info.OrigArgs.begin());
Info.IsVarArg = false;
break;
default:
llvm_unreachable("Unexpected intrinsic call");
}
assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
"Unexpected intrinsic");
return lowerChainCall(MIRBuilder, Info);
}

if (Info.IsVarArg) {
Expand Down
174 changes: 0 additions & 174 deletions llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll

This file was deleted.

26 changes: 0 additions & 26 deletions llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,29 +101,3 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
%ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false)
ret i64 %ret
}

declare amdgpu_gfx_whole_wave i32 @callee(i1 %active, i32 %x)

; Make sure we don't pass the first argument (i1).
define amdgpu_cs void @call(i32 %x, ptr %p) {
; CHECK-LABEL: name: call
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV1]](p0), @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit-def $vgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: G_STORE [[COPY3]](s32), [[MV]](p0) :: (store (s32) into %ir.p)
; CHECK-NEXT: S_ENDPGM 0
%ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, i32 %x) convergent
store i32 %ret, ptr %p
ret void
}
Loading
Loading