Skip to content

Commit 6ac0abf

Browse files
authored
[AMDGPU] gfx1251 VOP3 dpp support (#159654)
1 parent 9d6062c commit 6ac0abf

13 files changed

+745
-53
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,6 +1969,7 @@ class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
19691969
RegisterOperand ret =
19701970
!cond(!eq(VT, i1) : SSrc_i1,
19711971
!eq(VT, i16) : !if (IsFake16, VCSrc_b16, VCSrcT_b16),
1972+
!eq(VT, i64) : VCSrc_b64,
19721973
!eq(VT, f16) : !if (IsFake16, VCSrc_f16, VCSrcT_f16),
19731974
!eq(VT, bf16) : !if (IsFake16, VCSrc_bf16, VCSrcT_bf16),
19741975
!eq(VT, v2i16) : VCSrc_v2b16,

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> {
2424
}
2525
def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
2626
let Outs64 = (outs DstRC.RegClass:$vdst);
27+
let HasExt64BitDPP = 1;
2728
let IsSingle = 1;
2829
}
2930
}
@@ -51,7 +52,24 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
5152

5253
let HasExt64BitDPP = 1 in {
5354
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
54-
def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
55+
def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
56+
let OutsVOP3DPP = Outs64;
57+
let AsmVOP3DPP = getAsmVOP3DPP<Asm64>.ret;
58+
let AsmVOP3DPP16 = getAsmVOP3DPP16<Asm64>.ret;
59+
let AsmVOP3DPP8 = getAsmVOP3DPP8<Asm64>.ret;
60+
}
61+
62+
def VOP3b_I64_I1_I32_I32_I64_DPP : VOPProfile<[i64, i32, i32, i64]> {
63+
let HasClamp = 1;
64+
65+
let IsSingle = 1;
66+
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
67+
let OutsVOP3DPP = Outs64;
68+
let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
69+
let AsmVOP3DPP = getAsmVOP3DPP<Asm64>.ret;
70+
let AsmVOP3DPP16 = getAsmVOP3DPP16<Asm64>.ret;
71+
let AsmVOP3DPP8 = getAsmVOP3DPP8<Asm64>.ret;
72+
}
5573

5674
class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
5775
let HasExtVOP3DPP = 0;
@@ -229,7 +247,7 @@ defm V_DIV_FMAS_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f32", VOP_F32_F32_F32
229247
// result *= 2^64
230248
//
231249
let SchedRW = [WriteDouble], FPDPRounding = 1 in
232-
defm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []>;
250+
defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
233251
} // End Uses = [MODE, VCC, EXEC]
234252

235253
} // End isCommutable = 1
@@ -294,7 +312,7 @@ defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_
294312
defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
295313

296314
let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
297-
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
315+
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP_F64_F64_F64_F64_DPP_PROF, AMDGPUdiv_fixup>;
298316
defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, any_fldexp>;
299317
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
300318
} // End isReMaterializable = 1
@@ -335,7 +353,7 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d
335353

336354
// Double precision division pre-scale.
337355
let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in
338-
defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>;
356+
defm V_DIV_SCALE_F64 : VOP3Inst <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>;
339357
} // End mayRaiseFPException = 0
340358

341359
let isReMaterializable = 1 in
@@ -408,9 +426,9 @@ defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
408426
} // End SubtargetPredicate = isGFX7Plus
409427

410428
let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
411-
let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in {
412-
defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
413-
defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
429+
let SubtargetPredicate = isGFX7Plus in {
430+
defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64_DPP, null_frag, [HasNotMADIntraFwdBug]>;
431+
defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64_DPP, null_frag, [HasNotMADIntraFwdBug]>;
414432
}
415433
let SubtargetPredicate = isGFX11Only, OtherPredicates = [HasMADIntraFwdBug],
416434
Constraints = "@earlyclobber $vdst" in {
@@ -2054,8 +2072,8 @@ defm V_S_SQRT_F32 : VOP3Only_Real_Base_gfx12<0x288>;
20542072
defm V_S_SQRT_F16 : VOP3Only_Real_Base_gfx12<0x289>;
20552073
defm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">;
20562074
defm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">;
2057-
defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
2058-
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
2075+
defm V_MINIMUM_F64 : VOP3Only_Realtriple_gfx11_gfx12<0x341>;
2076+
defm V_MAXIMUM_F64 : VOP3Only_Realtriple_gfx11_gfx12<0x342>;
20592077
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
20602078
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
20612079
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">;
@@ -2127,6 +2145,13 @@ multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
21272145
VOP3be_Real<GFX11Gen, op, opName, asmName>,
21282146
VOP3be_Real<GFX12Gen, op, opName, asmName>;
21292147

2148+
multiclass VOP3be_Real_gfx11_gfx12_not_gfx1250<bits<10> op, string opName, string asmName> :
2149+
VOP3be_Real<GFX11Gen, op, opName, asmName>,
2150+
VOP3be_Real<GFX12Not12_50Gen, op, opName, asmName>;
2151+
2152+
multiclass VOP3be_Realtriple_gfx1250<bits<10> op> :
2153+
VOP3be_Realtriple<GFX1250Gen, op>;
2154+
21302155
multiclass VOP3_Real_No_Suffix_gfx11_gfx12<bits<10> op> :
21312156
VOP3_Real_No_Suffix<GFX11Gen, op>, VOP3_Real_No_Suffix<GFX12Gen, op>;
21322157

@@ -2141,7 +2166,7 @@ defm V_BFE_U32 : VOP3_Realtriple_gfx11_gfx12<0x210>;
21412166
defm V_BFE_I32 : VOP3_Realtriple_gfx11_gfx12<0x211>;
21422167
defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>;
21432168
defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>;
2144-
defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>;
2169+
defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x214>;
21452170
defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>;
21462171
defm V_ALIGNBIT_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x216, "v_alignbit_b32">;
21472172
defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">;
@@ -2161,9 +2186,9 @@ defm V_SAD_U16 : VOP3_Realtriple_gfx11_gfx12<0x224>;
21612186
defm V_SAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x225>;
21622187
defm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11_gfx12<0x226>;
21632188
defm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11_gfx12<0x227>;
2164-
defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12<0x228>;
2189+
defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x228>;
21652190
defm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11_gfx12<0x237>;
2166-
defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12<0x238>;
2191+
defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x238>;
21672192
defm V_MSAD_U8 : VOP3_Realtriple_gfx11_gfx12<0x239>;
21682193
defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>;
21692194
defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>;
@@ -2205,7 +2230,7 @@ defm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>;
22052230
defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">;
22062231
defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">;
22072232
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
2208-
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
2233+
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12_not_gfx1250<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
22092234
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
22102235
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
22112236
defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">;
@@ -2228,7 +2253,7 @@ defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>;
22282253
defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>;
22292254
defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>;
22302255
defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>;
2231-
defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32b>;
2256+
defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32b>;
22322257
defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32c>;
22332258
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32d>;
22342259
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32e>;
@@ -2237,8 +2262,8 @@ defm V_LSHLREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33
22372262
defm V_LSHRREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
22382263
defm V_ASHRREV_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33a, "v_ashrrev_i16">;
22392264
defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>;
2240-
defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12<0x33d>;
2241-
defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12<0x33e>;
2265+
defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x33d>;
2266+
defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x33e>;
22422267
defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x360>; // Pseudo in VOP2
22432268
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
22442269
defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2
@@ -2260,9 +2285,16 @@ let AssemblerPredicate = isGFX11Plus in {
22602285
}
22612286

22622287
// These instructions differ from GFX12 variant by supporting DPP:
2288+
defm V_FMA_F64 : VOP3Only_Realtriple_gfx1250<0x214>;
2289+
defm V_DIV_FIXUP_F64 : VOP3Only_Realtriple_gfx1250<0x228>;
2290+
defm V_DIV_FMAS_F64 : VOP3Only_Realtriple_gfx1250<0x238>;
2291+
defm V_DIV_SCALE_F64 : VOP3be_Realtriple_gfx1250<0x2fd>;
2292+
defm V_LDEXP_F64 : VOP3Only_Realtriple_gfx1250<0x32b>;
22632293
defm V_MUL_LO_U32 : VOP3Only_Realtriple_gfx1250<0x32c>;
22642294
defm V_MUL_HI_U32 : VOP3Only_Realtriple_gfx1250<0x32d>;
22652295
defm V_MUL_HI_I32 : VOP3Only_Realtriple_gfx1250<0x32e>;
2296+
defm V_LSHRREV_B64 : VOP3Only_Realtriple_gfx1250<0x33d>;
2297+
defm V_ASHRREV_I64 : VOP3Only_Realtriple_gfx1250<0x33e>;
22662298

22672299
defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>;
22682300
defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,8 +1041,9 @@ class VOP3_DPP_Pseudo <string OpName, VOPProfile P> :
10411041
let Size = 12;
10421042
let VOP3 = 1;
10431043
let AsmMatchConverter = "cvtVOP3DPP";
1044-
let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP,
1045-
AMDGPUAsmVariants.Disable);
1044+
let AsmVariantName = !if(!or(P.HasExtVOP3DPP, P.HasExt64BitDPP),
1045+
AMDGPUAsmVariants.VOP3_DPP,
1046+
AMDGPUAsmVariants.Disable);
10461047
}
10471048

10481049
class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
@@ -1115,8 +1116,9 @@ class VOP3_DPP_Base <string OpName, VOPProfile P, bit IsDPP16,
11151116
let OutOperandList = P.OutsVOP3DPP;
11161117
let AsmMatchConverter = "cvtVOP3DPP";
11171118
let VOP3 = 1;
1118-
let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP,
1119-
AMDGPUAsmVariants.Disable);
1119+
let AsmVariantName = !if(!or(P.HasExtVOP3DPP, P.HasExt64BitDPP),
1120+
AMDGPUAsmVariants.VOP3_DPP,
1121+
AMDGPUAsmVariants.Disable);
11201122
let Size = 12;
11211123
}
11221124

@@ -1855,10 +1857,12 @@ multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName,
18551857
}
18561858
}
18571859
}
1858-
def Gen.Suffix#"_VOP3_alias" : LetDummies,
1859-
AMDGPUMnemonicAlias<!if(!empty(pseudo_mnemonic),
1860-
ps.Mnemonic, pseudo_mnemonic), asmName, ""> {
1861-
let AssemblerPredicate = Gen.AssemblerPredicate;
1860+
if !ne(ps.Mnemonic, asmName) then {
1861+
def Gen.Suffix#"_VOP3_alias" : LetDummies,
1862+
AMDGPUMnemonicAlias<!if(!empty(pseudo_mnemonic),
1863+
ps.Mnemonic, pseudo_mnemonic), asmName, ""> {
1864+
let AssemblerPredicate = Gen.AssemblerPredicate;
1865+
}
18621866
}
18631867
}
18641868

@@ -1902,33 +1906,36 @@ multiclass VOP3_Real_dpp_with_name<GFXGen Gen, bits<10> op, string opName,
19021906

19031907
multiclass VOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
19041908
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1905-
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
1906-
let DecoderNamespace = Gen.DecoderNamespace;
1907-
let AssemblerPredicate = Gen.AssemblerPredicate;
1908-
}
1909+
if !not(ps.Pfl.HasExt64BitDPP) then
1910+
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
1911+
let DecoderNamespace = Gen.DecoderNamespace;
1912+
let AssemblerPredicate = Gen.AssemblerPredicate;
1913+
}
19091914
}
19101915

19111916
multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
19121917
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1913-
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8_t16<op, ps> {
1914-
let Inst{11} = ?;
1915-
let Inst{12} = ?;
1916-
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8;
1917-
let DecoderNamespace = Gen.DecoderNamespace
1918-
# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
1919-
let AssemblerPredicate = Gen.AssemblerPredicate;
1920-
}
1918+
if !not(ps.Pfl.HasExt64BitDPP) then
1919+
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
1920+
let Inst{11} = ?;
1921+
let Inst{12} = ?;
1922+
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8;
1923+
let DecoderNamespace = Gen.DecoderNamespace
1924+
# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
1925+
let AssemblerPredicate = Gen.AssemblerPredicate;
1926+
}
19211927
}
19221928

19231929
multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName,
19241930
string asmName> {
19251931
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1926-
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
1927-
DecoderNamespace = Gen.DecoderNamespace#
1928-
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
1929-
True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
1930-
NoTrue16Predicate) in {
1931-
defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>;
1932+
if !not(ps.Pfl.HasExt64BitDPP) then
1933+
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
1934+
DecoderNamespace = Gen.DecoderNamespace#
1935+
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
1936+
True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
1937+
NoTrue16Predicate) in {
1938+
defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>;
19321939
}
19331940
}
19341941

@@ -1955,10 +1962,11 @@ multiclass VOP3be_Real_dpp<GFXGen Gen, bits<10> op, string opName,
19551962
multiclass VOP3be_Real_dpp8<GFXGen Gen, bits<10> op, string opName,
19561963
string asmName> {
19571964
defvar ps = !cast<VOP3_Pseudo>(opName #"_e64");
1958-
def _e64_dpp8#Gen.Suffix : VOP3b_DPP8_Base<op, ps, asmName> {
1959-
let DecoderNamespace = Gen.DecoderNamespace;
1960-
let AssemblerPredicate = Gen.AssemblerPredicate;
1961-
}
1965+
if !not(ps.Pfl.HasExt64BitDPP) then
1966+
def _e64_dpp8#Gen.Suffix : VOP3b_DPP8_Base<op, ps, asmName> {
1967+
let DecoderNamespace = Gen.DecoderNamespace;
1968+
let AssemblerPredicate = Gen.AssemblerPredicate;
1969+
}
19621970
}
19631971

19641972
// VOP1 and VOP2 depend on these triple defs
@@ -2105,6 +2113,9 @@ multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
21052113
multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
21062114
VOP3_Realtriple<GFX1250Gen, op, isSingle>;
21072115

2116+
multiclass VOP3Only_Realtriple_gfx12_not_gfx1250<bits<10> op, bit isSingle = 0> :
2117+
VOP3_Realtriple<GFX12Not12_50Gen, op, isSingle>;
2118+
21082119
multiclass VOP3Only_Realtriple_with_name_gfx1250<bits<10> op, string opName,
21092120
string asmName, string pseudo_mnemonic = "",
21102121
bit isSingle = 0> :
@@ -2144,11 +2155,8 @@ multiclass VOP3Only_Realtriple_t16_and_fake16_gfx1250<bits<10> op,
21442155
multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
21452156
string asmName, bit isSingle = 0> {
21462157
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
2147-
let AsmString = asmName # ps.AsmOperands,
2148-
IsSingle = !or(isSingle, ps.Pfl.IsSingle) in
2149-
def _e64_gfx12 :
2150-
VOP3_Real_Gen<ps, GFX12Gen, asmName>,
2151-
VOP3be_gfx11_gfx12<op, ps.Pfl>;
2158+
defm NAME : VOP3be_Realtriple<GFX12Gen, op, !or(isSingle, ps.Pfl.IsSingle),
2159+
opName, asmName>;
21522160
def : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> {
21532161
let AssemblerPredicate = GFX12Gen.AssemblerPredicate;
21542162
}

llvm/test/CodeGen/AMDGPU/dpp64_combine.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX10 -DCTL=row_share
44
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX11 -DCTL=row_share
55
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX1250 -DCTL=row_share
6+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,DPP64-GFX1251 -DCTL=row_share
67

78
; GCN-LABEL: {{^}}dpp64_ceil:
89
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
@@ -23,6 +24,8 @@ define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) {
2324
; GCN-LABEL: {{^}}dpp64_rcp:
2425
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
2526
; DPP64-GFX9: v_rcp_f64_dpp [[V]], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
27+
; DPP64-GFX1251: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
28+
; DPP64-GFX1251: v_rcp_f64_e32
2629
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
2730
define amdgpu_kernel void @dpp64_rcp(ptr addrspace(1) %arg, i64 %in1) {
2831
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -79,6 +82,7 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
7982
; GFX1250: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]]
8083
; GFX1250: v_mov_b32_dpp [[V2]], [[V2]] {{row_share|row_newbcast}}:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
8184
; GFX1250: v_mul_lo_u32 [[V]], [[V2]], [[V]]{{$}}
85+
; DPP64-GFX1251: v_mul_lo_u32_e64_dpp [[V]], [[V]], [[V]] [[CTL]]:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
8286
define amdgpu_kernel void @dpp_mul_row_share(ptr addrspace(1) %arg) {
8387
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
8488
%gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id

0 commit comments

Comments
 (0)