Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
>;

def FeatureIEEEMinimumMaximumInsts : SubtargetFeature<"ieee-minimum-maximum-insts",
"HasIEEEMinimumMaximumInsts",
"true",
"Has v_minimum/maximum_f16/f32/f64, v_minimummaximum/maximumminimum_f16/f32 and v_pk_minimum/maximum_f16 instructions"
>;

def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
"HasMinimum3Maximum3F32",
"true",
Expand Down Expand Up @@ -1433,8 +1439,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics
]
>;

Expand Down Expand Up @@ -2188,6 +2194,10 @@ def isGFX12Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
AssemblerPredicate<(all_of FeatureGFX12Insts)>;

def HasIEEEMinimumMaximumInsts :
Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;

def HasMinimum3Maximum3F32 :
Predicate<"Subtarget->hasMinimum3Maximum3F32()">,
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2095,7 +2095,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_SADDO, G_SSUBO})
.lower();

if (ST.hasIEEEMinMax()) {
if (ST.hasIEEEMinimumMaximumInsts()) {
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
.legalFor(FPTypesPK16)
.clampMaxNumElements(0, S16, 2)
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasRequiredExportPriority = false;
bool HasVmemWriteVgprInOrder = false;
bool HasAshrPkInsts = false;
bool HasIEEEMinimumMaximumInsts = false;
bool HasMinimum3Maximum3F32 = false;
bool HasMinimum3Maximum3F16 = false;
bool HasMinimum3Maximum3PKF16 = false;
Expand Down Expand Up @@ -1447,10 +1448,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasIEEEMode() const { return getGeneration() < GFX12; }

// \returns true if the target has IEEE fminimum/fmaximum instructions
bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }

// \returns true if the target has IEEE fminimum3/fmaximum3 instructions
bool hasIEEEMinMax3() const { return hasIEEEMinMax(); }
bool hasIEEEMinimumMaximumInsts() const { return HasIEEEMinimumMaximumInsts; }

// \returns true if the target has WG_RR_MODE kernel descriptor mode bit
bool hasRrWGMode() const { return getGeneration() >= GFX12; }
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasPrefetch() && Subtarget->hasSafeSmemPrefetch())
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);

if (Subtarget->hasIEEEMinMax()) {
if (Subtarget->hasIEEEMinimumMaximumInsts()) {
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
} else {
Expand Down Expand Up @@ -7023,7 +7023,8 @@ SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
if (VT.isVector())
return splitBinaryVectorOp(Op, DAG);

assert(!Subtarget->hasIEEEMinMax() && !Subtarget->hasMinimum3Maximum3F16() &&
assert(!Subtarget->hasIEEEMinimumMaximumInsts() &&
!Subtarget->hasMinimum3Maximum3F16() &&
Subtarget->hasMinimum3Maximum3PKF16() && VT == MVT::f16 &&
"should not need to widen f16 minimum/maximum to v2f16");

Expand Down Expand Up @@ -13907,7 +13908,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
// operand form.
const SDNodeFlags Flags = N->getFlags();
if ((Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM) &&
!Subtarget->hasIEEEMinMax() && Flags.hasNoNaNs()) {
!Subtarget->hasIEEEMinimumMaximumInsts() && Flags.hasNoNaNs()) {
unsigned NewOpc =
Opc == ISD::FMINIMUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
return DAG.getNode(NewOpc, SDLoc(N), VT, Op0, Op1, Flags);
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3924,21 +3924,21 @@ let True16Predicate = UseFakeTrue16Insts in {
}
} // End SubtargetPredicate = [isGFX9Plus]

let SubtargetPredicate = isGFX12Plus in {
let SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
}

let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = isGFX12Plus in {
let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
}

let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = isGFX12Plus in {
let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
} // End SchedRW = [WriteIntMul]

let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0, AddedComplexity = 1 in {
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, fminimum>;
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, fmaximum>;
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, fminimum>;
Expand All @@ -177,7 +177,7 @@ let SchedRW = [WriteDoubleAdd] in {
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
defm V_MAXIMUM_F64 : VOP3Inst <"v_maximum_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaximum>;
} // End SchedRW = [WriteDoubleAdd]
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1
} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0, AddedComplexity = 1

} // End isReMaterializable = 1

Expand Down Expand Up @@ -1501,12 +1501,12 @@ let SubtargetPredicate = HasF32ToF16BF16ConversionSRInsts in {
def : Cvt_Scale_Sr_F32ToBF16F16_Pat<int_amdgcn_cvt_sr_f16_f32, V_CVT_SR_F16_F32_e64, v2f16>;
}

let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16<"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0

let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
defm V_DOT2_F16_F16 : VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I1
defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>;

let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
defm V_PK_MAXIMUM_F16 : VOP3PInst<"v_pk_maximum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fmaximum>;
defm V_PK_MINIMUM_F16 : VOP3PInst<"v_pk_minimum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fminimum>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0
}

defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;
Expand Down
Loading