Skip to content

Commit c6bfe49

Browse files
rovkakrishna2803
authored andcommitted
Revert "[AMDGPU] Intrinsic for launching whole wave functions" (llvm#152286)
Reverts llvm#145859 because it broke a HIP test: ``` [34/59] Building CXX object External/HIP/CMakeFiles/TheNextWeek-hip-6.3.0.dir/workload/ray-tracing/TheNextWeek/main.cc.o FAILED: External/HIP/CMakeFiles/TheNextWeek-hip-6.3.0.dir/workload/ray-tracing/TheNextWeek/main.cc.o /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG -O3 -DNDEBUG -w -Werror=date-time --rocm-path=/opt/botworker/llvm/External/hip/rocm-6.3.0 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -xhip -mfma -MD -MT External/HIP/CMakeFiles/TheNextWeek-hip-6.3.0.dir/workload/ray-tracing/TheNextWeek/main.cc.o -MF External/HIP/CMakeFiles/TheNextWeek-hip-6.3.0.dir/workload/ray-tracing/TheNextWeek/main.cc.o.d -o External/HIP/CMakeFiles/TheNextWeek-hip-6.3.0.dir/workload/ray-tracing/TheNextWeek/main.cc.o -c /home/botworker/bbot/clang-hip-vega20/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc fatal error: error in backend: Cannot select: intrinsic %llvm.amdgcn.readfirstlane ```
1 parent 40333de commit c6bfe49

File tree

10 files changed

+3
-1842
lines changed

10 files changed

+3
-1842
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2671,18 +2671,6 @@ def int_amdgcn_cs_chain:
26712671
],
26722672
[IntrConvergent, IntrNoReturn, ImmArg<ArgIndex<4>>]>;
26732673

2674-
// Run a function with all the lanes enabled. Only direct calls are allowed. The
2675-
// first argument is the callee, which must have the `amdgpu_gfx_whole_wave`
2676-
// calling convention and must not be variadic. The remaining arguments to the
2677-
// callee are taken from the arguments passed to the intrinsic. Lanes that are
2678-
// inactive at the point of the call will receive poison. The return value is
2679-
// the return value of the callee for the active lanes (there is no return
2680-
// value in the inactive ones).
2681-
def int_amdgcn_call_whole_wave:
2682-
Intrinsic<[llvm_any_ty], // The return type of the callee.
2683-
[llvm_anyptr_ty, // The callee.
2684-
llvm_vararg_ty], // The arguments to the callee.
2685-
[IntrConvergent]>;
26862674

26872675
//===----------------------------------------------------------------------===//
26882676
// CI+ Intrinsics

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2556,7 +2556,6 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
25562556
getOrCreateVReg(*ConstantInt::getTrue(CI.getType())));
25572557
return true;
25582558
case Intrinsic::amdgcn_cs_chain:
2559-
case Intrinsic::amdgcn_call_whole_wave:
25602559
return translateCallBase(CI, MIRBuilder);
25612560
case Intrinsic::fptrunc_round: {
25622561
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7984,43 +7984,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
79847984
HasTailCall = true;
79857985
return;
79867986
}
7987-
case Intrinsic::amdgcn_call_whole_wave: {
7988-
TargetLowering::ArgListTy Args;
7989-
7990-
// The first argument is the callee. Skip it when assembling the call args.
7991-
TargetLowering::ArgListEntry Arg;
7992-
for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) {
7993-
Arg.Node = getValue(I.getArgOperand(Idx));
7994-
Arg.Ty = I.getArgOperand(Idx)->getType();
7995-
Arg.setAttributes(&I, Idx);
7996-
Args.push_back(Arg);
7997-
}
7998-
7999-
SDValue ConvControlToken;
8000-
if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
8001-
auto *Token = Bundle->Inputs[0].get();
8002-
ConvControlToken = getValue(Token);
8003-
}
8004-
8005-
TargetLowering::CallLoweringInfo CLI(DAG);
8006-
CLI.setDebugLoc(getCurSDLoc())
8007-
.setChain(getRoot())
8008-
.setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
8009-
getValue(I.getArgOperand(0)), std::move(Args))
8010-
.setTailCall(false)
8011-
.setIsPreallocated(
8012-
I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
8013-
.setConvergent(I.isConvergent())
8014-
.setConvergenceControlToken(ConvControlToken);
8015-
CLI.CB = &I;
8016-
8017-
std::pair<SDValue, SDValue> Result =
8018-
lowerInvokable(CLI, /*EHPadBB=*/nullptr);
8019-
8020-
if (Result.first.getNode())
8021-
setValue(&I, Result.first);
8022-
return;
8023-
}
80247987
case Intrinsic::ptrmask: {
80257988
SDValue Ptr = getValue(I.getOperand(0));
80267989
SDValue Mask = getValue(I.getOperand(1));

llvm/lib/IR/Verifier.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6612,36 +6612,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
66126612
"Value for inactive lanes must be a VGPR function argument", &Call);
66136613
break;
66146614
}
6615-
case Intrinsic::amdgcn_call_whole_wave: {
6616-
auto F = dyn_cast<Function>(Call.getArgOperand(0));
6617-
Check(F, "Indirect whole wave calls are not allowed", &Call);
6618-
6619-
CallingConv::ID CC = F->getCallingConv();
6620-
Check(CC == CallingConv::AMDGPU_Gfx_WholeWave,
6621-
"Callee must have the amdgpu_gfx_whole_wave calling convention",
6622-
&Call);
6623-
6624-
Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);
6625-
6626-
Check(Call.arg_size() == F->arg_size(),
6627-
"Call argument count must match callee argument count", &Call);
6628-
6629-
// The first argument of the call is the callee, and the first argument of
6630-
// the callee is the active mask. The rest of the arguments must match.
6631-
Check(F->arg_begin()->getType()->isIntegerTy(1),
6632-
"Callee must have i1 as its first argument", &Call);
6633-
for (auto [CallArg, FuncArg] :
6634-
drop_begin(zip_equal(Call.args(), F->args()))) {
6635-
Check(CallArg->getType() == FuncArg.getType(),
6636-
"Argument types must match", &Call);
6637-
6638-
// Check that inreg attributes match between call site and function
6639-
Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
6640-
FuncArg.hasInRegAttr(),
6641-
"Argument inreg attributes must match", &Call);
6642-
}
6643-
break;
6644-
}
66456615
case Intrinsic::amdgcn_s_prefetch_data: {
66466616
Check(
66476617
AMDGPU::isFlatGlobalAddrSpace(

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,22 +1464,9 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
14641464
CallLoweringInfo &Info) const {
14651465
if (Function *F = Info.CB->getCalledFunction())
14661466
if (F->isIntrinsic()) {
1467-
switch (F->getIntrinsicID()) {
1468-
case Intrinsic::amdgcn_cs_chain:
1469-
return lowerChainCall(MIRBuilder, Info);
1470-
case Intrinsic::amdgcn_call_whole_wave:
1471-
Info.CallConv = CallingConv::AMDGPU_Gfx_WholeWave;
1472-
1473-
// Get the callee from the original instruction, so it doesn't look like
1474-
// this is an indirect call.
1475-
Info.Callee = MachineOperand::CreateGA(
1476-
cast<GlobalValue>(Info.CB->getOperand(0)), /*Offset=*/0);
1477-
Info.OrigArgs.erase(Info.OrigArgs.begin());
1478-
Info.IsVarArg = false;
1479-
break;
1480-
default:
1481-
llvm_unreachable("Unexpected intrinsic call");
1482-
}
1467+
assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
1468+
"Unexpected intrinsic");
1469+
return lowerChainCall(MIRBuilder, Info);
14831470
}
14841471

14851472
if (Info.IsVarArg) {

llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll

Lines changed: 0 additions & 174 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -101,29 +101,3 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
101101
%ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false)
102102
ret i64 %ret
103103
}
104-
105-
declare amdgpu_gfx_whole_wave i32 @callee(i1 %active, i32 %x)
106-
107-
; Make sure we don't pass the first argument (i1).
108-
define amdgpu_cs void @call(i32 %x, ptr %p) {
109-
; CHECK-LABEL: name: call
110-
; CHECK: bb.1 (%ir-block.0):
111-
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
112-
; CHECK-NEXT: {{ $}}
113-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
114-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
115-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
116-
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
117-
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
118-
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
119-
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
120-
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
121-
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV1]](p0), @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit-def $vgpr0
122-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
123-
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
124-
; CHECK-NEXT: G_STORE [[COPY3]](s32), [[MV]](p0) :: (store (s32) into %ir.p)
125-
; CHECK-NEXT: S_ENDPGM 0
126-
%ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, i32 %x) convergent
127-
store i32 %ret, ptr %p
128-
ret void
129-
}

0 commit comments

Comments
 (0)