Skip to content

Commit 068a19b

Browse files
committed
Rename to IEEEMinimumMaximumInsts, turn into a proper subtarget feature
1 parent 5ec1940 commit 068a19b

File tree

7 files changed

+25
-14
lines changed

7 files changed

+25
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,12 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
143143
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
144144
>;
145145

146+
def FeatureIEEEMinimumMaximumInsts : SubtargetFeature<"ieee-minimum-maximum-insts",
147+
"HasIEEEMinimumMaximumInsts",
148+
"true",
149+
"Has v_minimum/maximum_f16/f32/f64 and v_pk_minimum/maximum_f16 instructions"
150+
>;
151+
146152
def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
147153
"HasMinimum3Maximum3F32",
148154
"true",
@@ -1433,8 +1439,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
14331439
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
14341440
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
14351441
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
1436-
FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
1437-
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
1442+
FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
1443+
FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics
14381444
]
14391445
>;
14401446

@@ -2188,6 +2194,10 @@ def isGFX12Plus :
21882194
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
21892195
AssemblerPredicate<(all_of FeatureGFX12Insts)>;
21902196

2197+
def HasIEEEMinimumMaximumInsts :
2198+
Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
2199+
AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;
2200+
21912201
def HasMinimum3Maximum3F32 :
21922202
Predicate<"Subtarget->hasMinimum3Maximum3F32()">,
21932203
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2095,7 +2095,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
20952095
G_SADDO, G_SSUBO})
20962096
.lower();
20972097

2098-
if (ST.hasIEEEMinMaxInsts()) {
2098+
if (ST.hasIEEEMinimumMaximumInsts()) {
20992099
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
21002100
.legalFor(FPTypesPK16)
21012101
.clampMaxNumElements(0, S16, 2)

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
255255
bool HasRequiredExportPriority = false;
256256
bool HasVmemWriteVgprInOrder = false;
257257
bool HasAshrPkInsts = false;
258+
bool HasIEEEMinimumMaximumInsts = false;
258259
bool HasMinimum3Maximum3F32 = false;
259260
bool HasMinimum3Maximum3F16 = false;
260261
bool HasMinimum3Maximum3PKF16 = false;
@@ -1447,7 +1448,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
14471448
bool hasIEEEMode() const { return getGeneration() < GFX12; }
14481449

14491450
// \returns true if the target has IEEE fminimum/fmaximum instructions
1450-
bool hasIEEEMinMaxInsts() const { return getGeneration() >= GFX12; }
1451+
bool hasIEEEMinimumMaximumInsts() const { return HasIEEEMinimumMaximumInsts; }
14511452

14521453
// \returns true if the target has WG_RR_MODE kernel descriptor mode bit
14531454
bool hasRrWGMode() const { return getGeneration() >= GFX12; }

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
877877
if (Subtarget->hasPrefetch() && Subtarget->hasSafeSmemPrefetch())
878878
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
879879

880-
if (Subtarget->hasIEEEMinMaxInsts()) {
880+
if (Subtarget->hasIEEEMinimumMaximumInsts()) {
881881
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
882882
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
883883
} else {
@@ -7023,7 +7023,7 @@ SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
70237023
if (VT.isVector())
70247024
return splitBinaryVectorOp(Op, DAG);
70257025

7026-
assert(!Subtarget->hasIEEEMinMaxInsts() &&
7026+
assert(!Subtarget->hasIEEEMinimumMaximumInsts() &&
70277027
!Subtarget->hasMinimum3Maximum3F16() &&
70287028
Subtarget->hasMinimum3Maximum3PKF16() && VT == MVT::f16 &&
70297029
"should not need to widen f16 minimum/maximum to v2f16");

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3924,21 +3924,21 @@ let True16Predicate = UseFakeTrue16Insts in {
39243924
}
39253925
} // End SubtargetPredicate = [isGFX9Plus]
39263926

3927-
let SubtargetPredicate = isGFX12Plus in {
3927+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
39283928
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39293929
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39303930
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39313931
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39323932
}
39333933

3934-
let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = isGFX12Plus in {
3934+
let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
39353935
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39363936
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39373937
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39383938
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39393939
}
39403940

3941-
let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = isGFX12Plus in {
3941+
let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
39423942
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
39433943
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
39443944
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
167167
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
168168
} // End SchedRW = [WriteIntMul]
169169

170-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
170+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0, AddedComplexity = 1 in {
171171
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, fminimum>;
172172
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, fmaximum>;
173173
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, fminimum>;
@@ -177,7 +177,7 @@ let SchedRW = [WriteDoubleAdd] in {
177177
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
178178
defm V_MAXIMUM_F64 : VOP3Inst <"v_maximum_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaximum>;
179179
} // End SchedRW = [WriteDoubleAdd]
180-
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1
180+
} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0, AddedComplexity = 1
181181

182182
} // End isReMaterializable = 1
183183

@@ -1501,7 +1501,7 @@ let SubtargetPredicate = HasF32ToF16BF16ConversionSRInsts in {
15011501
def : Cvt_Scale_Sr_F32ToBF16F16_Pat<int_amdgcn_cvt_sr_f16_f32, V_CVT_SR_F16_F32_e64, v2f16>;
15021502
}
15031503

1504-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
1504+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
15051505
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
15061506
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
15071507
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I1
115115
defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
116116
defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
117117

118-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
118+
let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
119119
defm V_PK_MAXIMUM_F16 : VOP3PInst<"v_pk_maximum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fmaximum>;
120120
defm V_PK_MINIMUM_F16 : VOP3PInst<"v_pk_minimum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fminimum>;
121-
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
121+
} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0
122122
}
123123

124124
defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;

0 commit comments

Comments
 (0)