Skip to content

Commit bc1f85d

Browse files
changpengrampitec
andauthored
AMDGPU: Support packed bf16 instructions on gfx1250 (#150283)
Co-authored-by: Stanislav Mekhanoshin <[email protected]>
1 parent 17e32c9 commit bc1f85d

File tree

8 files changed

+920
-0
lines changed

8 files changed

+920
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,12 @@ def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts",
559559
"Has bf16 conversion instructions"
560560
>;
561561

562+
def FeatureBF16PackedInsts : SubtargetFeature<"bf16-pk-insts",
563+
"HasBF16PackedInsts",
564+
"true",
565+
"Has bf16 packed instructions (fma, add, mul, max, min)"
566+
>;
567+
562568
def FeatureVOP3P : SubtargetFeature<"vop3p",
563569
"HasVOP3PInsts",
564570
"true",
@@ -1989,6 +1995,7 @@ def FeatureISAVersion12_50 : FeatureSet<
19891995
FeatureTransposeLoadF4F6Insts,
19901996
FeatureBF16TransInsts,
19911997
FeatureBF16ConversionInsts,
1998+
FeatureBF16PackedInsts,
19921999
FeatureCvtPkF16F32Inst,
19932000
FeatureMinimum3Maximum3PKF16,
19942001
FeaturePrngInst,
@@ -2472,6 +2479,9 @@ def HasBF16TransInsts : Predicate<"Subtarget->hasBF16TransInsts()">,
24722479
def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">,
24732480
AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>;
24742481

2482+
def HasBF16PackedInsts : Predicate<"Subtarget->hasBF16PackedInsts()">,
2483+
AssemblerPredicate<(all_of FeatureBF16PackedInsts)>;
2484+
24752485
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
24762486
AssemblerPredicate<(all_of FeatureVOP3P)>;
24772487

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class AMDGPUSubtarget {
6161
bool EnableRealTrue16Insts = false;
6262
bool HasBF16TransInsts = false;
6363
bool HasBF16ConversionInsts = false;
64+
bool HasBF16PackedInsts = false;
6465
bool HasMadMixInsts = false;
6566
bool HasMadMacF32Insts = false;
6667
bool HasDsSrc2Insts = false;
@@ -209,6 +210,8 @@ class AMDGPUSubtarget {
209210
return HasBF16ConversionInsts;
210211
}
211212

213+
bool hasBF16PackedInsts() const { return HasBF16PackedInsts; }
214+
212215
bool hasMadMixInsts() const {
213216
return HasMadMixInsts;
214217
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
944944
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
945945
}
946946

947+
if (Subtarget->hasBF16PackedInsts()) {
948+
setOperationAction(
949+
{ISD::FADD, ISD::FMUL, ISD::FMINNUM, ISD::FMAXNUM, ISD::FMA},
950+
MVT::v2bf16, Legal);
951+
}
952+
947953
if (Subtarget->hasBF16TransInsts()) {
948954
setOperationAction({ISD::FEXP2, ISD::FLOG2, ISD::FSQRT}, MVT::bf16, Legal);
949955
}

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2873,10 +2873,12 @@ def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>;
28732873

28742874
def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
28752875
def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2876+
def VOP_V2BF16_V2BF16_V2BF16 : VOPProfile <[v2bf16, v2bf16, v2bf16, untyped]>;
28762877
def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
28772878

28782879
def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
28792880
def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2881+
def VOP_V2BF16_V2BF16_V2BF16_V2BF16 : VOPProfile <[v2bf16, v2bf16, v2bf16, v2bf16]>;
28802882
def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
28812883
def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
28822884

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,6 +1196,14 @@ let isCommutable = 1, isReMaterializable = 1 in {
11961196

11971197
let SubtargetPredicate = HasPkMovB32, isAsCheapAsAMove = 1 in
11981198
defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>;
1199+
1200+
let SubtargetPredicate = HasBF16PackedInsts in {
1201+
defm V_PK_ADD_BF16 : VOP3PInst<"v_pk_add_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fadd>;
1202+
defm V_PK_MUL_BF16 : VOP3PInst<"v_pk_mul_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fmul>;
1203+
defm V_PK_MIN_NUM_BF16 : VOP3PInst<"v_pk_min_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fminnum_like>;
1204+
defm V_PK_MAX_NUM_BF16 : VOP3PInst<"v_pk_max_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fmaxnum_like>;
1205+
defm V_PK_FMA_BF16 : VOP3PInst<"v_pk_fma_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fma>;
1206+
}
11991207
} // End isCommutable = 1, isReMaterializable = 1
12001208

12011209
def : AMDGPUMnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">;
@@ -2222,6 +2230,11 @@ defm V_PK_MAX3_I16 : VOP3P_Real_gfx1250<0x2f>;
22222230
defm V_PK_MAX3_U16 : VOP3P_Real_gfx1250<0x30>;
22232231
defm V_PK_MIN3_I16 : VOP3P_Real_gfx1250<0x31>;
22242232
defm V_PK_MIN3_U16 : VOP3P_Real_gfx1250<0x32>;
2233+
defm V_PK_FMA_BF16 : VOP3P_Real_gfx1250<0x11>;
2234+
defm V_PK_ADD_BF16 : VOP3P_Real_gfx1250<0x23>;
2235+
defm V_PK_MUL_BF16 : VOP3P_Real_gfx1250<0x2a>;
2236+
defm V_PK_MIN_NUM_BF16 : VOP3P_Real_gfx1250<0x2b>;
2237+
defm V_PK_MAX_NUM_BF16 : VOP3P_Real_gfx1250<0x2c>;
22252238

22262239
defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>;
22272240
defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>;

0 commit comments

Comments
 (0)