Skip to content

Commit 1e99070

Browse files
author
Jagtap, Pravin
authored
AMDGPU: Widen f16 minimum/maximum to v2f16 and form v2f16 minimum3/maximum3 on gfx950 (llvm#762)
2 parents cd9dcb9 + 9bd2174 commit 1e99070

File tree

7 files changed

+920
-624
lines changed

7 files changed

+920
-624
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -860,8 +860,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
860860
if (Subtarget->hasMinimum3Maximum3F32())
861861
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
862862

863-
if (Subtarget->hasMinimum3Maximum3PKF16())
863+
if (Subtarget->hasMinimum3Maximum3PKF16()) {
864864
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::v2f16, Legal);
865+
866+
// If only the vector form is available, we need to widen to a vector.
867+
if (!Subtarget->hasMinimum3Maximum3F16())
868+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
869+
}
865870
}
866871

867872
setOperationAction(ISD::INTRINSIC_WO_CHAIN,
@@ -5842,6 +5847,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
58425847
case ISD::FMINNUM:
58435848
case ISD::FMAXNUM:
58445849
return lowerFMINNUM_FMAXNUM(Op, DAG);
5850+
case ISD::FMINIMUM:
5851+
case ISD::FMAXIMUM:
5852+
return lowerFMINIMUM_FMAXIMUM(Op, DAG);
58455853
case ISD::FLDEXP:
58465854
case ISD::STRICT_FLDEXP:
58475855
return lowerFLDEXP(Op, DAG);
@@ -5863,8 +5871,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
58635871
case ISD::FMUL:
58645872
case ISD::FMINNUM_IEEE:
58655873
case ISD::FMAXNUM_IEEE:
5866-
case ISD::FMINIMUM:
5867-
case ISD::FMAXIMUM:
58685874
case ISD::UADDSAT:
58695875
case ISD::USUBSAT:
58705876
case ISD::SADDSAT:
@@ -6718,6 +6724,34 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
67186724
return Op;
67196725
}
67206726

6727+
SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
6728+
SelectionDAG &DAG) const {
6729+
EVT VT = Op.getValueType();
6730+
if (VT.isVector())
6731+
return splitBinaryVectorOp(Op, DAG);
6732+
6733+
assert(!Subtarget->hasIEEEMinMax() && !Subtarget->hasMinimum3Maximum3F16() &&
6734+
Subtarget->hasMinimum3Maximum3PKF16() && VT == MVT::f16 &&
6735+
"should not need to widen f16 minimum/maximum to v2f16");
6736+
6737+
// Widen f16 operation to v2f16
6738+
6739+
// fminimum f16:x, f16:y ->
6740+
// extract_vector_elt (fminimum (v2f16 (scalar_to_vector x))
6741+
// (v2f16 (scalar_to_vector y))), 0
6742+
SDLoc SL(Op);
6743+
SDValue WideSrc0 =
6744+
DAG.getNode(ISD::SCALAR_TO_VECTOR, SL, MVT::v2f16, Op.getOperand(0));
6745+
SDValue WideSrc1 =
6746+
DAG.getNode(ISD::SCALAR_TO_VECTOR, SL, MVT::v2f16, Op.getOperand(1));
6747+
6748+
SDValue Widened =
6749+
DAG.getNode(Op.getOpcode(), SL, MVT::v2f16, WideSrc0, WideSrc1);
6750+
6751+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::f16, Widened,
6752+
DAG.getConstant(0, SL, MVT::i32));
6753+
}
6754+
67216755
SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
67226756
bool IsStrict = Op.getOpcode() == ISD::STRICT_FLDEXP;
67236757
EVT VT = Op.getValueType();
@@ -13459,7 +13493,8 @@ static bool supportsMin3Max3(const GCNSubtarget &Subtarget, unsigned Opc,
1345913493
case ISD::FMINIMUM:
1346013494
case ISD::FMAXIMUM:
1346113495
return (VT == MVT::f32 && Subtarget.hasMinimum3Maximum3F32()) ||
13462-
(VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16());
13496+
(VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16()) ||
13497+
(VT == MVT::v2f16 && Subtarget.hasMinimum3Maximum3PKF16());
1346313498
case ISD::SMAX:
1346413499
case ISD::SMIN:
1346513500
case ISD::UMAX:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
146146
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
147147
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
148148
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
149+
SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
149150
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
150151
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
151152
SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
145145
} // End SubtargetPredicate = HasVOP3PInsts
146146

147147
let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in {
148-
defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
149-
defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
148+
defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, AMDGPUfminimum3>;
149+
defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, AMDGPUfmaximum3>;
150150
}
151151

152152
// TODO: Make sure we're doing the right thing with denormals. Note

0 commit comments

Comments
 (0)