Skip to content

Commit e56658e

Browse files
changpengrampitec
andcommitted
[AMDGPU] V_PK_ADD_{MIN|MAX}_{I|U}16 and V_{MIN|MAX}3_{I|U}16 MC support on gfx1250
Co-Authored-by: Stanislav Mekhanoshin <[email protected]>
1 parent c267928 commit e56658e

File tree

6 files changed

+908
-0
lines changed

6 files changed

+908
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2519,6 +2519,14 @@ def HasFmaakFmamkF64Insts :
25192519
Predicate<"Subtarget->hasFmaakFmamkF64Insts()">,
25202520
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
25212521

2522+
def HasPkAddMinMaxInsts :
2523+
Predicate<"Subtarget->hasPkAddMinMaxInsts()">,
2524+
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
2525+
2526+
def HasPkMinMax3Insts :
2527+
Predicate<"Subtarget->hasPkMinMax3Insts()">,
2528+
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
2529+
25222530
def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">,
25232531
AssemblerPredicate<(all_of FeatureImageInsts)>;
25242532

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,6 +1500,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
15001500

15011501
bool hasVOPD3() const { return GFX1250Insts; }
15021502

1503+
// \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1504+
bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
1505+
1506+
// \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1507+
bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1508+
15031509
// \returns true if target has S_SETPRIO_INC_WG instruction.
15041510
bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
15051511

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,25 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
353353
defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
354354
}
355355

356+
def PK_ADD_MINMAX_Profile : VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16, VOP3_PACKED> {
357+
let HasModifiers = 0;
358+
}
359+
360+
let isCommutable = 1, isReMaterializable = 1 in {
361+
let SubtargetPredicate = HasPkAddMinMaxInsts in {
362+
defm V_PK_ADD_MAX_I16 : VOP3PInst<"v_pk_add_max_i16", PK_ADD_MINMAX_Profile>;
363+
defm V_PK_ADD_MAX_U16 : VOP3PInst<"v_pk_add_max_u16", PK_ADD_MINMAX_Profile>;
364+
defm V_PK_ADD_MIN_I16 : VOP3PInst<"v_pk_add_min_i16", PK_ADD_MINMAX_Profile>;
365+
defm V_PK_ADD_MIN_U16 : VOP3PInst<"v_pk_add_min_u16", PK_ADD_MINMAX_Profile>;
366+
}
367+
let SubtargetPredicate = HasPkMinMax3Insts in {
368+
defm V_PK_MAX3_I16 : VOP3PInst<"v_pk_max3_i16", PK_ADD_MINMAX_Profile>;
369+
defm V_PK_MAX3_U16 : VOP3PInst<"v_pk_max3_u16", PK_ADD_MINMAX_Profile>;
370+
defm V_PK_MIN3_I16 : VOP3PInst<"v_pk_min3_i16", PK_ADD_MINMAX_Profile>;
371+
defm V_PK_MIN3_U16 : VOP3PInst<"v_pk_min3_u16", PK_ADD_MINMAX_Profile>;
372+
}
373+
} // End isCommutable = 1, isReMaterializable = 1
374+
356375
// Defines patterns that extract signed 4bit from each Idx[0].
357376
foreach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in
358377
def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src),
@@ -2157,6 +2176,8 @@ multiclass VOP3P_Realtriple_gfx11_gfx12<bits<8> op>
21572176

21582177
multiclass VOP3P_Real_gfx12<bits<8> op> : VOP3P_Real_Base<GFX12Gen, op>;
21592178

2179+
multiclass VOP3P_Real_gfx1250<bits<8> op> : VOP3P_Real_Base<GFX1250Gen, op>;
2180+
21602181
multiclass VOP3P_Real_with_name_gfx12<bits<8> op,
21612182
string backing_ps_name = NAME,
21622183
string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> :
@@ -2165,6 +2186,15 @@ multiclass VOP3P_Real_with_name_gfx12<bits<8> op,
21652186
defm V_PK_MIN_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1b, "V_PK_MIN_F16", "v_pk_min_num_f16">;
21662187
defm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_max_num_f16">;
21672188

2189+
defm V_PK_ADD_MAX_I16 : VOP3P_Real_gfx1250<0x14>;
2190+
defm V_PK_ADD_MAX_U16 : VOP3P_Real_gfx1250<0x15>;
2191+
defm V_PK_ADD_MIN_I16 : VOP3P_Real_gfx1250<0x2d>;
2192+
defm V_PK_ADD_MIN_U16 : VOP3P_Real_gfx1250<0x2e>;
2193+
defm V_PK_MAX3_I16 : VOP3P_Real_gfx1250<0x2f>;
2194+
defm V_PK_MAX3_U16 : VOP3P_Real_gfx1250<0x30>;
2195+
defm V_PK_MIN3_I16 : VOP3P_Real_gfx1250<0x31>;
2196+
defm V_PK_MIN3_U16 : VOP3P_Real_gfx1250<0x32>;
2197+
21682198
defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>;
21692199
defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>;
21702200

0 commit comments

Comments
 (0)