Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 100 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,48 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;

def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The feature string doesn't follow those existing names. For example, this can just be cube-insts, lerp-insts, etc.

"HasCubeInsts",
"true",
"Has V_CUBE* instructions"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Has V_CUBE* instructions"
"Has v_cube* instructions"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and all the new features

>;

def FeatureLerpInst : SubtargetFeature<"V_LERP-insts",
"HasLerpInst",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The builtin for this is actually broken and missing a feature check

"true",
"Has v_lerp_u8 instruction"
>;

def FeatureSadInsts : SubtargetFeature<"V_SAD-insts",
"HasSadInsts",
"true",
"Has V_SAD* instructions"
>;

def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Builtin doesn't have a feature check

"HasQsadInsts",
"true",
"Has V_QSAD* instructions"
>;

def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts",
"HasCvtNormInsts",
"true",
"Has V_CVT_NORM* instructions"
>;

def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
"HasCvtPkNormVOP2Insts",
"true",
"Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;

def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
"HasCvtPkNormVOP3Insts",
"true",
"Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;

def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
"HasAtomicDsPkAdd16Insts",
"true",
Expand Down Expand Up @@ -1494,7 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureVmemWriteVgprInOrder
FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
FeatureSadInsts, FeatureCvtPkNormVOP2Insts
]
>;

Expand All @@ -1508,7 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
FeatureVmemWriteVgprInOrder
FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts
]
>;

Expand All @@ -1524,7 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureCubeInsts,
FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
FeatureCvtPkNormVOP2Insts
]
>;

Expand All @@ -1543,7 +1589,10 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
FeatureCvtPkNormVOP3Insts
]
>;

Expand All @@ -1567,7 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureCubeInsts,
FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
FeatureCvtPkNormVOP3Insts
]
>;

Expand All @@ -1590,7 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureVmemWriteVgprInOrder
FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts,
FeatureCvtPkNormVOP2Insts, FeatureCvtPkNormVOP3Insts
]
>;

Expand Down Expand Up @@ -2069,10 +2123,17 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureBVHDualAndBVH8Insts,
FeatureWaitsBeforeSystemScopeStores,
FeatureD16Writes32BitVgpr
FeatureD16Writes32BitVgpr,
FeatureCubeInsts,
FeatureLerpInst,
FeatureSadInsts,
FeatureQsadInsts,
FeatureCvtNormInsts,
FeatureCvtPkNormVOP2Insts,
FeatureCvtPkNormVOP3Insts
]>;

def FeatureISAVersion12_50 : FeatureSet<
def FeatureISAVersion12_50_Common : FeatureSet<
[FeatureGFX12,
FeatureGFX1250Insts,
FeatureRequiresAlignedVGPRs,
Expand Down Expand Up @@ -2147,6 +2208,16 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureD16Writes32BitVgpr,
]>;

def FeatureISAVersion12_50 : FeatureSet<
!listconcat(FeatureISAVersion12_50_Common.Features,
[FeatureCubeInsts,
FeatureLerpInst,
FeatureSadInsts,
FeatureQsadInsts,
FeatureCvtNormInsts,
FeatureCvtPkNormVOP2Insts,
FeatureCvtPkNormVOP3Insts])>;

def FeatureISAVersion12_51 : FeatureSet<
!listconcat(FeatureISAVersion12_50.Features,
[FeatureDPALU_DPP])>;
Expand Down Expand Up @@ -2816,6 +2887,27 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>;

def HasCubeInsts : Predicate<"Subtarget->hasCubeInsts()">,
AssemblerPredicate<(all_of FeatureCubeInsts)>;

def HasLerpInst : Predicate<"Subtarget->hasLerpInst()">,
AssemblerPredicate<(all_of FeatureLerpInst)>;

def HasSadInsts : Predicate<"Subtarget->hasSadInsts()">,
AssemblerPredicate<(all_of FeatureSadInsts)>;

def HasQsadInsts : Predicate<"Subtarget->hasQsadInsts()">,
AssemblerPredicate<(all_of FeatureQsadInsts)>;

def HasCvtNormInsts : Predicate<"Subtarget->hasCvtNormInsts()">,
AssemblerPredicate<(all_of FeatureCvtNormInsts)>;

def HasCvtPkNormVOP2Insts : Predicate<"Subtarget->hasCvtPkNormVOP2Insts()">,
AssemblerPredicate<(all_of FeatureCvtPkNormVOP2Insts)>;

def HasCvtPkNormVOP3Insts : Predicate<"Subtarget->hasCvtPkNormVOP3Insts()">,
AssemblerPredicate<(all_of FeatureCvtPkNormVOP3Insts)>;

def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">,
AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>;

Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasFP8ConversionInsts = false;
bool HasCubeInsts = false;
bool HasLerpInst = false;
bool HasSadInsts = false;
bool HasQsadInsts = false;
bool HasCvtNormInsts = false;
bool HasCvtPkNormVOP2Insts = false;
bool HasCvtPkNormVOP3Insts = false;
bool HasFP8E5M3Insts = false;
bool HasCvtFP8Vop1Bug = false;
bool HasPkFmacF16Inst = false;
Expand Down Expand Up @@ -892,6 +899,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }

bool hasCubeInsts() const { return HasCubeInsts; }

bool hasLerpInst() const { return HasLerpInst; }

bool hasSadInsts() const { return HasSadInsts; }

bool hasQsadInsts() const { return HasQsadInsts; }

bool hasCvtNormInsts() const { return HasCvtNormInsts; }

bool hasCvtPkNormVOP2Insts() const { return HasCvtPkNormVOP2Insts; }

bool hasCvtPkNormVOP3Insts() const { return HasCvtPkNormVOP3Insts; }

bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }

bool hasPkFmacF16Inst() const {
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -616,15 +616,15 @@ let SubtargetPredicate = isGFX9Plus in {

let isReMaterializable = 1 in
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;

let mayRaiseFPException = 0 in {
defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
} // End mayRaiseFPException = 0
} // End SubtargetPredicate = isGFX9Plus

let mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts in {
defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
} // End mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts

let SubtargetPredicate = isGFX9Only in {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
} // End SubtargetPredicate = isGFX9Only
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a
} // End IsNeverUniform = 1
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;

let ReadsModeReg = 0, mayRaiseFPException = 0 in {
let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasCvtPkNormVOP2Insts in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
}
Expand Down
22 changes: 13 additions & 9 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SubtargetPredicate = HasLerpInst in
defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;

let SchedRW = [WriteIntMul] in {
let SubtargetPredicate = HasMadU32Inst in
Expand Down Expand Up @@ -258,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
} // End isCommutable = 1

let isReMaterializable = 1 in {
let mayRaiseFPException = 0 in {
let mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts in {
defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
} // End mayRaiseFPException
} // mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts

defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
Expand Down Expand Up @@ -306,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0

let isCommutable = 1 in {
let isCommutable = 1, SubtargetPredicate = HasSadInsts in {
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
} // End isCommutable = 1
} // End isCommutable = 1, SubtargetPredicate = HasSadInsts
defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;

defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
Expand Down Expand Up @@ -424,7 +425,8 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {

let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
let SubtargetPredicate = HasQsadInsts in
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
} // End SubtargetPredicate = isGFX7Plus
Expand Down Expand Up @@ -789,9 +791,6 @@ let isCommutable = 1 in {
defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>;
} // End isCommutable = 1

defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;

defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;

let isReMaterializable = 1 in {
Expand Down Expand Up @@ -996,6 +995,11 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),

} // End SubtargetPredicate = isGFX9Plus

let SubtargetPredicate = HasCvtPkNormVOP3Insts in {
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
} // end SubtargetPredicate = HasCvtPkNormVOP3Insts

// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
Instruction inst> : GCNPat<
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s

declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0
Expand Down
Loading