Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 100 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,48 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;

def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't follow feature naming scheme, no caps

"HasVCUBEInsts",
"true",
"Has V_CUBE* instructions"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Has V_CUBE* instructions"
"Has v_cube* instructions"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and all the new features

>;

def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts",
"HasVLERPInsts",
"true",
"Has V_LERP* instructions"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only one instruction.

>;

def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts",
"HasVSADInsts",
"true",
"Has V_SAD* instructions"
>;

def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts",
"HasVQSADInsts",
"true",
"Has V_QSAD* instructions"
>;

def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts",
"HasVCVTNORMInsts",
"true",
"Has V_CVT_NORM* instructions"
>;

def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
"HasVCVTPKNORMVOP2Insts",
"true",
"Has V_CVT_NORM* VOP2 instructions"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a weird name/description because v_cvt_pk_norm_i16_f32/v_cvt_pk_norm_u16_f32 are only VOP2 on GFX6-7, they are VOP3 only on newer hardware.

Maybe something like Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions would be better.

>;

def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
"HasVCVTPKNORMVOP3Insts",
"true",
"Has V_CVT_NORM* VOP3 instructions"
>;

def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
"HasAtomicDsPkAdd16Insts",
"true",
Expand Down Expand Up @@ -1494,7 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureVmemWriteVgprInOrder
FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts
]
>;

Expand All @@ -1508,7 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
FeatureVmemWriteVgprInOrder
FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts
]
>;

Expand All @@ -1524,7 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts,
FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
FeatureVCVTPKNORMVOP2Insts
]
>;

Expand All @@ -1543,7 +1589,10 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
FeatureVCVTPKNORMVOP3Insts
]
>;

Expand All @@ -1567,7 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts,
FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
FeatureVCVTPKNORMVOP3Insts
]
>;

Expand All @@ -1590,7 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureVmemWriteVgprInOrder
FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts,
FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts
]
>;

Expand Down Expand Up @@ -2069,10 +2123,17 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureBVHDualAndBVH8Insts,
FeatureWaitsBeforeSystemScopeStores,
FeatureD16Writes32BitVgpr
FeatureD16Writes32BitVgpr,
FeatureVCUBEInsts,
FeatureVLERPInsts,
FeatureVSADInsts,
FeatureVQSADInsts,
FeatureVCVTNORMInsts,
FeatureVCVTPKNORMVOP2Insts,
FeatureVCVTPKNORMVOP3Insts
]>;

def FeatureISAVersion12_50 : FeatureSet<
def FeatureISAVersion12_50_Common : FeatureSet<
[FeatureGFX12,
FeatureGFX1250Insts,
FeatureRequiresAlignedVGPRs,
Expand Down Expand Up @@ -2147,6 +2208,16 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureD16Writes32BitVgpr,
]>;

def FeatureISAVersion12_50 : FeatureSet<
!listconcat(FeatureISAVersion12_50_Common.Features,
[FeatureVCUBEInsts,
FeatureVLERPInsts,
FeatureVSADInsts,
FeatureVQSADInsts,
FeatureVCVTNORMInsts,
FeatureVCVTPKNORMVOP2Insts,
FeatureVCVTPKNORMVOP3Insts])>;

def FeatureISAVersion12_51 : FeatureSet<
!listconcat(FeatureISAVersion12_50.Features,
[FeatureDPALU_DPP])>;
Expand Down Expand Up @@ -2816,6 +2887,27 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>;

def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">,
AssemblerPredicate<(all_of FeatureVCUBEInsts)>;

def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">,
AssemblerPredicate<(all_of FeatureVLERPInsts)>;

def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">,
AssemblerPredicate<(all_of FeatureVSADInsts)>;

def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">,
AssemblerPredicate<(all_of FeatureVQSADInsts)>;

def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">,
AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>;

def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">,
AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>;

def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">,
AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>;

def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">,
AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>;

Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasFP8ConversionInsts = false;
bool HasVCUBEInsts = false;
bool HasVLERPInsts = false;
bool HasVSADInsts = false;
bool HasVQSADInsts = false;
bool HasVCVTNORMInsts = false;
bool HasVCVTPKNORMVOP2Insts = false;
bool HasVCVTPKNORMVOP3Insts = false;
bool HasFP8E5M3Insts = false;
bool HasCvtFP8Vop1Bug = false;
bool HasPkFmacF16Inst = false;
Expand Down Expand Up @@ -892,6 +899,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }

bool hasVCUBEInsts() const { return HasVCUBEInsts; }

bool hasVLERPInsts() const { return HasVLERPInsts; }

bool hasVSADInsts() const { return HasVSADInsts; }

bool hasVQSADInsts() const { return HasVQSADInsts; }

bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; }

bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; }

bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; }

bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }

bool hasPkFmacF16Inst() const {
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -616,15 +616,15 @@ let SubtargetPredicate = isGFX9Plus in {

let isReMaterializable = 1 in
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;

let mayRaiseFPException = 0 in {
defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
} // End mayRaiseFPException = 0
} // End SubtargetPredicate = isGFX9Plus

let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in {
defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts

let SubtargetPredicate = isGFX9Only in {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
} // End SubtargetPredicate = isGFX9Only
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a
} // End IsNeverUniform = 1
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;

let ReadsModeReg = 0, mayRaiseFPException = 0 in {
let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
}
Expand Down
22 changes: 13 additions & 9 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SubtargetPredicate = HasVLERPInsts in
defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;

let SchedRW = [WriteIntMul] in {
let SubtargetPredicate = HasMadU32Inst in
Expand Down Expand Up @@ -258,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
} // End isCommutable = 1

let isReMaterializable = 1 in {
let mayRaiseFPException = 0 in {
let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in {
defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
} // End mayRaiseFPException
} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts

defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
Expand Down Expand Up @@ -306,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0

let isCommutable = 1 in {
let isCommutable = 1, SubtargetPredicate = HasVSADInsts in {
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
} // End isCommutable = 1
} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts
defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;

defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
Expand Down Expand Up @@ -424,7 +425,8 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {

let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
let SubtargetPredicate = HasVQSADInsts in
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
} // End SubtargetPredicate = isGFX7Plus
Expand Down Expand Up @@ -789,9 +791,6 @@ let isCommutable = 1 in {
defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>;
} // End isCommutable = 1

defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;

defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;

let isReMaterializable = 1 in {
Expand Down Expand Up @@ -996,6 +995,11 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),

} // End SubtargetPredicate = isGFX9Plus

let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in {
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts

// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
Instruction inst> : GCNPat<
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s

declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0
Expand Down
Loading