Skip to content
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
49f9f87
Add subtarget feature
rovka Jan 24, 2025
64a3d2f
[AMDGPU] ISel & PEI for whole wave functions
rovka Jan 27, 2025
47594fd
Use MF instead of MBB
rovka Mar 17, 2025
08ef43e
Revert "Add subtarget feature"
rovka Mar 11, 2025
02d7aec
Add new CC. Do nothing
rovka Mar 19, 2025
e4b378f
Replace SubtargetFeature with CallingConv
rovka Mar 11, 2025
97ba693
Enable gisel in tests
rovka Mar 17, 2025
7b44133
GISel support
rovka Mar 11, 2025
2d2f85b
Rename pseudo to match others
rovka Mar 19, 2025
fb6d20a
Rename CC
rovka Mar 25, 2025
81adaba
Fix formatting
rovka Mar 25, 2025
9931578
Update tests after merge
rovka May 6, 2025
7b75dff
Fix bug in testcase
rovka May 6, 2025
de8c395
Test inreg args
rovka May 19, 2025
5b5e137
Add docs and fixme
rovka May 20, 2025
1123374
Remove kill flags on orig exec mask
rovka Jun 17, 2025
339d2c7
Add helper to add orig exec to return
rovka Jun 23, 2025
154c430
Test with single use of orig exec
rovka Jun 23, 2025
843136a
Test calling gfx func from wwf
rovka Jun 23, 2025
9e3d9f2
Test wave64
rovka Jun 24, 2025
f6f9337
Fix a few missed spots
rovka Jun 24, 2025
639fb8c
clang-format
rovka Jun 25, 2025
f19a8df
Fix CC in test
rovka Jun 27, 2025
846aa2b
Verifier checks for whole wave funcs
rovka Jun 27, 2025
9bed239
[AMDGPU] Intrinsic for launching whole wave functions
rovka Jan 24, 2025
974e0fb
Remove Verifier check that I moved to previous PR
rovka Jun 27, 2025
8dc9461
Remove embarrassing cast
rovka Jun 27, 2025
f487cb4
Address review comments
rovka Jul 21, 2025
b104da3
Merge remote-tracking branch 'remotes/origin/main' into users/rovka/w…
rovka Jul 21, 2025
3e6b02c
Merge branch 'main' into users/rovka/whole-wave-funcs-call
rovka Jul 22, 2025
197b56d
Fixup merge mishap
rovka Jul 22, 2025
906f8ca
Merge remote-tracking branch 'remotes/origin/main' into users/rovka/w…
rovka Jul 29, 2025
7a850d8
s/size != 0/empty
rovka Jul 29, 2025
6d9c46f
Address review comments
rovka Aug 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1844,6 +1844,20 @@ The AMDGPU backend supports the following calling conventions:
..TODO::
Describe.

``amdgpu_gfx_whole_wave`` Used for AMD graphics targets. Functions with this calling convention
cannot be used as entry points. They must have an i1 as the first argument,
which will be mapped to the value of EXEC on entry into the function. Other
arguments will contain poison in their inactive lanes. Similarly, the return
value for the inactive lanes is poison.

The function will run with all lanes enabled, i.e. EXEC will be set to -1 in the
prologue and restored to its original value in the epilogue. The inactive lanes
will be preserved for all the registers used by the function. Active lanes only
will only be preserved for the callee saved registers.

In all other respects, functions with this calling convention behave like
``amdgpu_gfx`` functions.

``amdgpu_gs`` Used for Mesa/AMDPAL geometry shaders.
..TODO::
Describe.
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/AsmParser/LLToken.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ enum Kind {
kw_amdgpu_cs_chain_preserve,
kw_amdgpu_kernel,
kw_amdgpu_gfx,
kw_amdgpu_gfx_whole_wave,
kw_tailcc,
kw_m68k_rtdcc,
kw_graalcc,
Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/IR/CallingConv.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,9 @@ namespace CallingConv {
RISCV_VLSCall_32768 = 122,
RISCV_VLSCall_65536 = 123,

// Calling convention for AMDGPU whole wave functions.
AMDGPU_Gfx_WholeWave = 124,

/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};
Expand All @@ -294,8 +297,13 @@ namespace CallingConv {
/// directly or indirectly via a call-like instruction.
constexpr bool isCallableCC(CallingConv::ID CC) {
switch (CC) {
// Called with special intrinsics:
// llvm.amdgcn.cs.chain
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
// llvm.amdgcn.call.whole.wave
case CallingConv::AMDGPU_Gfx_WholeWave:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally would introduce this new calling convention as a separate patch; this needs separate bitcode compatibility tests, and should get its own set of verifier checks for no address capture / only use is the intrinsic call

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's in the previous patch in this stack. I've added some more tests like you requested :)

// Hardware entry points:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_GS:
Expand Down
12 changes: 12 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2586,6 +2586,18 @@ def int_amdgcn_cs_chain:
],
[IntrConvergent, IntrNoReturn, ImmArg<ArgIndex<4>>]>;

// Run a function with all the lanes enabled. Only direct calls are allowed. The
// first argument is the callee, which must have the `amdgpu_gfx_whole_wave`
// calling convention and must not be variadic. The remaining arguments to the
// callee are taken from the arguments passed to the intrinsic. Lanes that are
// inactive at the point of the call will receive poison. The return value is
// the return value of the callee for the active lanes and poison for the
// inactive ones.
def int_amdgcn_call_whole_wave:
Intrinsic<[llvm_any_ty], // The return type of the callee.
[llvm_anyptr_ty, // The callee.
llvm_vararg_ty], // The arguments to the callee.
[IntrConvergent, IntrNoReturn, IntrNoCallback, IntrNoFree]>;

//===----------------------------------------------------------------------===//
// CI+ Intrinsics
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/AsmParser/LLLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(amdgpu_cs_chain_preserve);
KEYWORD(amdgpu_kernel);
KEYWORD(amdgpu_gfx);
KEYWORD(amdgpu_gfx_whole_wave);
KEYWORD(tailcc);
KEYWORD(m68k_rtdcc);
KEYWORD(graalcc);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/AsmParser/LLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2272,6 +2272,9 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
CC = CallingConv::AMDGPU_CS_ChainPreserve;
break;
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
case lltok::kw_amdgpu_gfx_whole_wave:
CC = CallingConv::AMDGPU_Gfx_WholeWave;
break;
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
case lltok::kw_m68k_rtdcc: CC = CallingConv::M68k_RTD; break;
case lltok::kw_graalcc: CC = CallingConv::GRAAL; break;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2548,6 +2548,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*ConstantInt::getTrue(CI.getType())));
return true;
case Intrinsic::amdgcn_cs_chain:
case Intrinsic::amdgcn_call_whole_wave:
return translateCallBase(CI, MIRBuilder);
case Intrinsic::fptrunc_round: {
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
Expand Down
37 changes: 37 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7997,6 +7997,43 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
HasTailCall = true;
return;
}
case Intrinsic::amdgcn_call_whole_wave: {
TargetLowering::ArgListTy Args;

// The first argument is the callee. Skip it when assembling the call args.
TargetLowering::ArgListEntry Arg;
for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) {
Arg.Node = getValue(I.getArgOperand(Idx));
Arg.Ty = I.getArgOperand(Idx)->getType();
Arg.setAttributes(&I, Idx);
Args.push_back(Arg);
}

SDValue ConvControlToken;
if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
ConvControlToken = getValue(Token);
}

TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
.setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
getValue(I.getArgOperand(0)), std::move(Args))
.setTailCall(false)
.setIsPreallocated(
I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
.setConvergent(I.isConvergent())
.setConvergenceControlToken(ConvControlToken);
CLI.CB = &I;

std::pair<SDValue, SDValue> Result =
lowerInvokable(CLI, /*EHPadBB*/ nullptr);

if (Result.first.getNode())
setValue(&I, Result.first);
return;
}
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
SDValue Mask = getValue(I.getOperand(1));
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/IR/AsmWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
break;
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
case CallingConv::AMDGPU_Gfx_WholeWave:
Out << "amdgpu_gfx_whole_wave";
break;
case CallingConv::M68k_RTD: Out << "m68k_rtdcc"; break;
case CallingConv::RISCV_VectorCall:
Out << "riscv_vector_cc";
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/IR/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,7 @@ bool llvm::CallingConv::supportsNonVoidReturnType(CallingConv::ID CC) {
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::WASM_EmscriptenInvoke:
case CallingConv::AMDGPU_Gfx:
case CallingConv::AMDGPU_Gfx_WholeWave:
case CallingConv::M68k_INTR:
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
Expand Down
40 changes: 40 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2978,6 +2978,16 @@ void Verifier::visitFunction(const Function &F) {
"perfect forwarding!",
&F);
break;
case CallingConv::AMDGPU_Gfx_WholeWave:
Check(F.arg_size() != 0 && F.arg_begin()->getType()->isIntegerTy(1),
"Calling convention requires first argument to be i1", &F);
Check(!F.arg_begin()->hasInRegAttr(),
"Calling convention requires first argument to not be inreg", &F);
Check(!F.isVarArg(),
"Calling convention does not support varargs or "
"perfect forwarding!",
&F);
break;
}

// Check that the argument values match the function type for this function...
Expand Down Expand Up @@ -6571,6 +6581,36 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"Value for inactive lanes must be a VGPR function argument", &Call);
break;
}
case Intrinsic::amdgcn_call_whole_wave: {
auto F = dyn_cast<Function>(Call.getArgOperand(0));
Check(F, "Indirect whole wave calls are not allowed", &Call);

CallingConv::ID CC = F->getCallingConv();
Check(CC == CallingConv::AMDGPU_Gfx_WholeWave,
"Callee must have the amdgpu_gfx_whole_wave calling convention",
&Call);

Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);

Check(Call.arg_size() == F->arg_size(),
"Call argument count must match callee argument count", &Call);

// The first argument of the call is the callee, and the first argument of
// the callee is the active mask. The rest of the arguments must match.
Check(F->arg_begin()->getType()->isIntegerTy(1),
"Callee must have i1 as its first argument", &Call);
for (auto [CallArg, FuncArg] :
drop_begin(zip_equal(Call.args(), F->args()))) {
Check(CallArg->getType() == FuncArg.getType(),
"Argument types must match", &Call);

// Check that inreg attributes match between call site and function
Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
FuncArg.hasInRegAttr(),
"Argument inreg attributes must match", &Call);
}
break;
}
case Intrinsic::amdgcn_s_prefetch_data: {
Check(
AMDGPU::isFlatGlobalAddrSpace(
Expand Down
52 changes: 46 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,15 +374,21 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
return true;
}

unsigned ReturnOpc =
IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
const bool IsWholeWave = MFI->isWholeWaveFunction();
unsigned ReturnOpc = IsWholeWave ? AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN
: IsShader ? AMDGPU::SI_RETURN_TO_EPILOG
: AMDGPU::SI_RETURN;
auto Ret = B.buildInstrNoInsert(ReturnOpc);

if (!FLI.CanLowerReturn)
insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
else if (!lowerReturnVal(B, Val, VRegs, Ret))
return false;

if (IsWholeWave) {
addOriginalExecToReturn(B.getMF(), Ret);
}

// TODO: Handle CalleeSavedRegsViaCopy.

B.insertInstr(Ret);
Expand Down Expand Up @@ -632,6 +638,17 @@ bool AMDGPUCallLowering::lowerFormalArguments(
if (DL.getTypeStoreSize(Arg.getType()) == 0)
continue;

if (Info->isWholeWaveFunction() && Idx == 0) {
assert(VRegs[Idx].size() == 1 && "Expected only one register");

// The first argument for whole wave functions is the original EXEC value.
B.buildInstr(AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
.addDef(VRegs[Idx][0]);

++Idx;
continue;
}

const bool InReg = Arg.hasAttribute(Attribute::InReg);

if (Arg.hasAttribute(Attribute::SwiftSelf) ||
Expand Down Expand Up @@ -1347,6 +1364,7 @@ bool AMDGPUCallLowering::lowerTailCall(
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;

if (Info.CallConv != CallingConv::AMDGPU_Gfx &&
Info.CallConv != CallingConv::AMDGPU_Gfx_WholeWave &&
!AMDGPU::isChainCC(Info.CallConv)) {
// With a fixed ABI, allocate fixed registers before user arguments.
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
Expand Down Expand Up @@ -1447,9 +1465,22 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
if (Function *F = Info.CB->getCalledFunction())
if (F->isIntrinsic()) {
assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
"Unexpected intrinsic");
return lowerChainCall(MIRBuilder, Info);
switch (F->getIntrinsicID()) {
case Intrinsic::amdgcn_cs_chain:
return lowerChainCall(MIRBuilder, Info);
case Intrinsic::amdgcn_call_whole_wave:
Info.CallConv = CallingConv::AMDGPU_Gfx_WholeWave;

// Get the callee from the original instruction, so it doesn't look like
// this is an indirect call.
Info.Callee = MachineOperand::CreateGA(
cast<GlobalValue>(Info.CB->getOperand(0)), /*Offset=*/0);
Info.OrigArgs.erase(Info.OrigArgs.begin());
Info.IsVarArg = false;
break;
default:
llvm_unreachable("Unexpected intrinsic call");
}
}

if (Info.IsVarArg) {
Expand Down Expand Up @@ -1524,7 +1555,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// after the ordinary user argument registers.
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;

if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
if (Info.CallConv != CallingConv::AMDGPU_Gfx &&
Info.CallConv != CallingConv::AMDGPU_Gfx_WholeWave) {
// With a fixed ABI, allocate fixed registers before user arguments.
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
return false;
Expand Down Expand Up @@ -1592,3 +1624,11 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,

return true;
}

void AMDGPUCallLowering::addOriginalExecToReturn(
MachineFunction &MF, MachineInstrBuilder &Ret) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const MachineInstr *Setup = TII->getWholeWaveFunctionSetup(MF);
Ret.addReg(Setup->getOperand(0).getReg());
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class AMDGPUCallLowering final : public CallLowering {
bool lowerReturnVal(MachineIRBuilder &B, const Value *Val,
ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const;

void addOriginalExecToReturn(MachineFunction &MF,
MachineInstrBuilder &Ret) const;

public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,10 @@ def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SSHORT, SIsbuffer_load_short>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_USHORT, SIsbuffer_load_ushort>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_PREFETCH, SIsbuffer_prefetch>;

def : GINodeEquiv<G_AMDGPU_WHOLE_WAVE_FUNC_SETUP, AMDGPUwhole_wave_setup>;
// G_AMDGPU_WHOLE_WAVE_FUNC_RETURN is simpler than AMDGPUwhole_wave_return,
// so we don't mark it as equivalent.

class GISelSop2Pat <
SDPatternOperator node,
Instruction inst,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::Cold:
return CC_AMDGPU_Func;
case CallingConv::AMDGPU_Gfx:
case CallingConv::AMDGPU_Gfx_WholeWave:
return CC_SI_Gfx;
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
Expand All @@ -1168,6 +1169,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
case CallingConv::AMDGPU_LS:
return RetCC_SI_Shader;
case CallingConv::AMDGPU_Gfx:
case CallingConv::AMDGPU_Gfx_WholeWave:
return RetCC_SI_Gfx;
case CallingConv::C:
case CallingConv::Fast:
Expand Down Expand Up @@ -5875,6 +5877,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32)
NODE_NAME_CASE(WHOLE_WAVE_SETUP)
NODE_NAME_CASE(WHOLE_WAVE_RETURN)
}
return nullptr;
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,12 @@ enum NodeType : unsigned {
BUFFER_ATOMIC_FMAX,
BUFFER_ATOMIC_COND_SUB_U32,
LAST_MEMORY_OPCODE = BUFFER_ATOMIC_COND_SUB_U32,

// Set up a whole wave function.
WHOLE_WAVE_SETUP,

// Return from a whole wave function.
WHOLE_WAVE_RETURN,
};

} // End namespace AMDGPUISD
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,17 @@ def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",

def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;

// Marks the entry into a whole wave function.
def AMDGPUwhole_wave_setup : SDNode<
"AMDGPUISD::WHOLE_WAVE_SETUP", SDTypeProfile<1, 0, [SDTCisInt<0>]>,
[SDNPHasChain, SDNPSideEffect]>;

// Marks the return from a whole wave function.
def AMDGPUwhole_wave_return : SDNode<
"AMDGPUISD::WHOLE_WAVE_RETURN", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;

// SI+ export
def AMDGPUExportOp : SDTypeProfile<0, 8, [
SDTCisInt<0>, // i8 tgt
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4160,6 +4160,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return true;
case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
return selectWaveAddress(I);
case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
return true;
}
case AMDGPU::G_STACKRESTORE:
return selectStackRestore(I);
case AMDGPU::G_PHI:
Expand Down
Loading
Loading