Skip to content

Commit 868793f

Browse files
changpengrampitecshiltian
authored
AMDGPU: Support intrinsic selection for gfx1250 wmma instructions (#148957)
Co-authored-by: Stanislav Mekhanoshin <[email protected]> Co-authored-by: Shilei Tian <[email protected]>
1 parent ae3bba4 commit 868793f

18 files changed

+6560
-24
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2919,6 +2919,20 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var"
29192919
// the form: D = A * B + C.
29202920
// A is sparse matrix, half the size of B, and is expanded using sparsity index.
29212921

2922+
class AMDGPUSWmmacIntrinsicIdxReuse<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
2923+
Intrinsic<
2924+
[CD], // %D
2925+
[
2926+
A, // %A
2927+
B, // %B
2928+
LLVMMatchType<0>, // %C
2929+
Index, // %Sparsity index for A
2930+
llvm_i1_ty, // matrix_a_reuse
2931+
llvm_i1_ty, // matrix_b_reuse
2932+
],
2933+
[IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]
2934+
>;
2935+
29222936
class AMDGPUSWmmacIntrinsicIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
29232937
Intrinsic<
29242938
[CD], // %D
@@ -3602,6 +3616,161 @@ def int_amdgcn_fdiv_fast : DefaultAttrsIntrinsic<
36023616
[IntrNoMem, IntrSpeculatable]
36033617
>;
36043618

3619+
// WMMA intrinsics.
3620+
class AMDGPUWmmaIntrinsicModsAB<LLVMType AB, LLVMType CD> :
3621+
Intrinsic<
3622+
[CD], // %D
3623+
[
3624+
llvm_i1_ty, // %A_mod: 0 -- none, 1 -- neg
3625+
AB, // %A
3626+
llvm_i1_ty, // %B_mod: 0 -- none, 1 -- neg
3627+
LLVMMatchType<1>, // %B
3628+
LLVMMatchType<0>, // %C
3629+
llvm_i1_ty, // matrix_a_reuse
3630+
llvm_i1_ty, // matrix_b_reuse
3631+
],
3632+
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>,
3633+
IntrWillReturn, IntrNoCallback, IntrNoFree]
3634+
>;
3635+
3636+
class AMDGPUWmmaIntrinsicModsC<LLVMType AB, LLVMType CD> :
3637+
Intrinsic<
3638+
[CD], // %D
3639+
[
3640+
AB, // %A
3641+
LLVMMatchType<1>, // %B
3642+
llvm_i16_ty, // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs)
3643+
LLVMMatchType<0>, // %C
3644+
llvm_i1_ty, // matrix_a_reuse
3645+
llvm_i1_ty, // matrix_b_reuse
3646+
],
3647+
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>,
3648+
IntrWillReturn, IntrNoCallback, IntrNoFree]
3649+
>;
3650+
3651+
class AMDGPUWmmaIntrinsicF4ModsC<LLVMType A, LLVMType B, LLVMType CD> :
3652+
Intrinsic<
3653+
[CD], // %D
3654+
[
3655+
A, // %A
3656+
B, // %B
3657+
llvm_i16_ty, // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs)
3658+
LLVMMatchType<0>, // %C
3659+
],
3660+
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<2>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
3661+
>;
3662+
3663+
class AMDGPUWmmaIntrinsicModsAll<LLVMType AB, LLVMType CD> :
3664+
Intrinsic<
3665+
[CD], // %D
3666+
[
3667+
llvm_i1_ty, // %A_mod: 0 -- none, 1 -- neg
3668+
AB, // %A
3669+
llvm_i1_ty, // %B_mod: 0 -- none, 1 -- neg
3670+
LLVMMatchType<1>, // %B
3671+
llvm_i16_ty, // %C_mod: 0 -- none, 1 -- neg, 2 -- abs, 3 -- neg(abs)
3672+
LLVMMatchType<0>, // %C
3673+
],
3674+
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
3675+
>;
3676+
3677+
class AMDGPUWmmaIntrinsicModsAllReuse<LLVMType AB, LLVMType CD> :
3678+
Intrinsic<
3679+
[CD], // %D
3680+
[
3681+
llvm_i1_ty, // %A_mod: 0 -- none, 1 -- neg
3682+
AB, // %A
3683+
llvm_i1_ty, // %B_mod: 0 -- none, 1 -- neg
3684+
LLVMMatchType<1>, // %B
3685+
llvm_i16_ty, // %C_mod: 0 -- none, 1 -- neg, 2 -- abs, 3 -- neg(abs)
3686+
LLVMMatchType<0>, // %C
3687+
llvm_i1_ty, // matrix_a_reuse
3688+
llvm_i1_ty, // matrix_b_reuse
3689+
],
3690+
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
3691+
IntrWillReturn, IntrNoCallback, IntrNoFree]
3692+
>;
3693+
3694+
// D and C are of different types.
3695+
class AMDGPUWmmaIntrinsicModsAllDiff<LLVMType DstTy, LLVMType AB, LLVMType C> :
3696+
Intrinsic<
3697+
[DstTy], // %D
3698+
[
3699+
llvm_i1_ty, // %A_mod: 0 -- none, 1 -- neg
3700+
AB, // %A
3701+
llvm_i1_ty, // %B_mod: 0 -- none, 1 -- neg
3702+
LLVMMatchType<1>, // %B
3703+
llvm_i16_ty, // %C_mod: 0 -- none, 1 -- neg, 2 -- abs, 3 -- neg(abs)
3704+
C, // %C
3705+
llvm_i1_ty, // matrix_a_reuse
3706+
llvm_i1_ty, // matrix_b_reuse
3707+
],
3708+
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
3709+
IntrWillReturn, IntrNoCallback, IntrNoFree]
3710+
>;
3711+
3712+
defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX1250 = {
3713+
def int_amdgcn_wmma_f64_16x16x4_f64 : AMDGPUWmmaIntrinsicModsAll<llvm_anyfloat_ty, llvm_anyfloat_ty>;
3714+
def int_amdgcn_wmma_f32_16x16x4_f32 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
3715+
def int_amdgcn_wmma_f32_16x16x32_bf16 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
3716+
def int_amdgcn_wmma_f32_16x16x32_f16 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
3717+
def int_amdgcn_wmma_f16_16x16x32_f16 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
3718+
def int_amdgcn_wmma_bf16_16x16x32_bf16 : AMDGPUWmmaIntrinsicModsAllReuse<llvm_anyfloat_ty, llvm_anyfloat_ty>;
3719+
def int_amdgcn_wmma_bf16f32_16x16x32_bf16 : AMDGPUWmmaIntrinsicModsAllDiff<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty>;
3720+
def int_amdgcn_wmma_f32_16x16x64_fp8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3721+
def int_amdgcn_wmma_f32_16x16x64_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3722+
def int_amdgcn_wmma_f32_16x16x64_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3723+
def int_amdgcn_wmma_f32_16x16x64_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3724+
def int_amdgcn_wmma_f16_16x16x64_fp8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3725+
def int_amdgcn_wmma_f16_16x16x64_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3726+
def int_amdgcn_wmma_f16_16x16x64_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3727+
def int_amdgcn_wmma_f16_16x16x64_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3728+
def int_amdgcn_wmma_f16_16x16x128_fp8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3729+
def int_amdgcn_wmma_f16_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3730+
def int_amdgcn_wmma_f16_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3731+
def int_amdgcn_wmma_f16_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3732+
def int_amdgcn_wmma_f32_16x16x128_fp8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3733+
def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3734+
def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3735+
def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
3736+
def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>;
3737+
def int_amdgcn_wmma_f32_32x16x128_f4 : AMDGPUWmmaIntrinsicF4ModsC<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty>;
3738+
}
3739+
3740+
class AMDGPUSWmmacIntrinsicABIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
3741+
Intrinsic<
3742+
[CD], // %D
3743+
[
3744+
llvm_i1_ty, // %A_mod: 0 - none, 1 - neg
3745+
A, // %A
3746+
llvm_i1_ty, // %B_mod: 0 - none, 1 - neg
3747+
B, // %B
3748+
LLVMMatchType<0>, // %C
3749+
Index, // %Sparsity index for A
3750+
llvm_i1_ty, // matrix_a_reuse
3751+
llvm_i1_ty, // matrix_b_reuse
3752+
],
3753+
[IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>]
3754+
>;
3755+
3756+
defset list<Intrinsic> AMDGPUSWMMACIntrinsicsGFX1250 = {
3757+
def int_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3758+
def int_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3759+
def int_amdgcn_swmmac_f16_16x16x64_f16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3760+
def int_amdgcn_swmmac_bf16_16x16x64_bf16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3761+
def int_amdgcn_swmmac_bf16f32_16x16x64_bf16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3762+
def int_amdgcn_swmmac_f32_16x16x128_fp8_fp8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3763+
def int_amdgcn_swmmac_f32_16x16x128_fp8_bf8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3764+
def int_amdgcn_swmmac_f32_16x16x128_bf8_fp8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3765+
def int_amdgcn_swmmac_f32_16x16x128_bf8_bf8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3766+
def int_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3767+
def int_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3768+
def int_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3769+
def int_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
3770+
def int_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
3771+
}
3772+
3773+
36053774
class AMDGPUTensorLoadStore:
36063775
Intrinsic<
36073776
[],

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ def gi_vop3pmodsneg :
5555
GIComplexOperandMatcher<s32, "selectVOP3PModsNeg">,
5656
GIComplexPatternEquiv<VOP3PModsNeg>;
5757

58+
def gi_vop3pmodsnegs :
59+
GIComplexOperandMatcher<s32, "selectVOP3PModsNegs">,
60+
GIComplexPatternEquiv<VOP3PModsNegs>;
61+
62+
def gi_dotiuvop3pmodsnegabs :
63+
GIComplexOperandMatcher<s32, "selectVOP3PModsNegAbs">,
64+
GIComplexPatternEquiv<VOP3PModsNegAbs>;
65+
5866
def gi_wmmaopselvop3pmods :
5967
GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
6068
GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
@@ -83,6 +91,10 @@ def gi_swmmacindex16 :
8391
GIComplexOperandMatcher<s32, "selectSWMMACIndex16">,
8492
GIComplexPatternEquiv<SWMMACIndex16>;
8593

94+
def gi_swmmacindex32 :
95+
GIComplexOperandMatcher<s64, "selectSWMMACIndex32">,
96+
GIComplexPatternEquiv<SWMMACIndex32>;
97+
8698
def gi_vop3opselmods :
8799
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
88100
GIComplexPatternEquiv<VOP3OpSelMods>;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3273,6 +3273,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
32733273
return SelectVOP3PMods(In, Src, SrcMods, true);
32743274
}
32753275

3276+
// Select neg_lo from the i1 immediate operand.
32763277
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
32773278
const ConstantSDNode *C = cast<ConstantSDNode>(In);
32783279
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
@@ -3288,6 +3289,47 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
32883289
return true;
32893290
}
32903291

3292+
// Select both neg_lo and neg_hi from the i1 immediate operand. This is
3293+
// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
3294+
// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
3295+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const {
3296+
const ConstantSDNode *C = cast<ConstantSDNode>(In);
3297+
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
3298+
// 1 promotes packed values to signed, 0 treats them as unsigned.
3299+
assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
3300+
3301+
unsigned Mods = SISrcMods::OP_SEL_1;
3302+
unsigned SrcSign = C->getZExtValue();
3303+
if (SrcSign == 1)
3304+
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
3305+
3306+
Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3307+
return true;
3308+
}
3309+
3310+
// Select neg, abs, or both neg and abs from the i16 immediate operans.
3311+
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const {
3312+
const ConstantSDNode *C = cast<ConstantSDNode>(In);
3313+
unsigned Mods = SISrcMods::OP_SEL_1;
3314+
unsigned SrcMod = C->getZExtValue();
3315+
switch (SrcMod) {
3316+
default: // Any other value will be silently ignored (considered as 0).
3317+
break;
3318+
case 1:
3319+
Mods ^= SISrcMods::NEG;
3320+
break;
3321+
case 2:
3322+
Mods ^= SISrcMods::ABS;
3323+
break;
3324+
case 3:
3325+
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
3326+
break;
3327+
}
3328+
3329+
Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3330+
return true;
3331+
}
3332+
32913333
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
32923334
SDValue &Src) const {
32933335
const ConstantSDNode *C = cast<ConstantSDNode>(In);
@@ -3639,6 +3681,41 @@ bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,
36393681
return true;
36403682
}
36413683

3684+
bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(SDValue In, SDValue &Src,
3685+
SDValue &IndexKey) const {
3686+
unsigned Key = 0;
3687+
Src = In;
3688+
3689+
SDValue InI32;
3690+
3691+
if (In.getOpcode() == ISD::ANY_EXTEND || In.getOpcode() == ISD::ZERO_EXTEND) {
3692+
const SDValue &ExtendSrc = In.getOperand(0);
3693+
if (ExtendSrc.getValueSizeInBits() == 32)
3694+
InI32 = ExtendSrc;
3695+
} else if (In->getOpcode() == ISD::BITCAST) {
3696+
const SDValue &CastSrc = In.getOperand(0);
3697+
if (CastSrc.getOpcode() == ISD::BUILD_VECTOR &&
3698+
CastSrc.getOperand(0).getValueSizeInBits() == 32) {
3699+
ConstantSDNode *Zero = dyn_cast<ConstantSDNode>(CastSrc.getOperand(1));
3700+
if (Zero && Zero->getZExtValue() == 0)
3701+
InI32 = CastSrc.getOperand(0);
3702+
}
3703+
}
3704+
3705+
if (InI32 && InI32.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3706+
const SDValue &ExtractVecEltSrc = InI32.getOperand(0);
3707+
ConstantSDNode *EltIdx = dyn_cast<ConstantSDNode>(InI32.getOperand(1));
3708+
if (ExtractVecEltSrc.getValueSizeInBits() == 64 && EltIdx &&
3709+
EltIdx->getZExtValue() == 1) {
3710+
Key = 1;
3711+
Src = ExtractVecEltSrc;
3712+
}
3713+
}
3714+
3715+
IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
3716+
return true;
3717+
}
3718+
36423719
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
36433720
SDValue &SrcMods) const {
36443721
Src = In;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
222222
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
223223

224224
bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
225+
bool SelectVOP3PModsNegs(SDValue In, SDValue &Src) const;
226+
bool SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const;
225227
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
226228

227229
bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
@@ -233,6 +235,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
233235

234236
bool SelectSWMMACIndex8(SDValue In, SDValue &Src, SDValue &IndexKey) const;
235237
bool SelectSWMMACIndex16(SDValue In, SDValue &Src, SDValue &IndexKey) const;
238+
bool SelectSWMMACIndex32(SDValue In, SDValue &Src, SDValue &IndexKey) const;
236239

237240
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
238241

0 commit comments

Comments
 (0)