Skip to content

Commit a0f3dc9

Browse files
authored
Added dagCombine for selectWithConstants, alongwith using target hook for another selectWithConstants case (llvm#973)
CQE testing has been done!
2 parents 0449767 + 751f45b commit a0f3dc9

27 files changed

+7689
-3826
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27692,7 +27692,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
2769227692
if ((Fold || Swap) &&
2769327693
TLI.getBooleanContents(CmpOpVT) ==
2769427694
TargetLowering::ZeroOrOneBooleanContent &&
27695-
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27695+
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
27696+
TLI.convertSelectOfConstantsToMath(VT)) {
2769627697

2769727698
if (Swap) {
2769827699
CC = ISD::getSetCCInverse(CC, CmpOpVT);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
918918
ISD::FADD,
919919
ISD::FSUB,
920920
ISD::FDIV,
921+
ISD::FMUL,
921922
ISD::FMINNUM,
922923
ISD::FMAXNUM,
923924
ISD::FMINNUM_IEEE,
@@ -14764,6 +14765,66 @@ SDValue SITargetLowering::performFDivCombine(SDNode *N,
1476414765
return SDValue();
1476514766
}
1476614767

14768+
SDValue SITargetLowering::performFMulCombine(SDNode *N,
14769+
DAGCombinerInfo &DCI) const {
14770+
SelectionDAG &DAG = DCI.DAG;
14771+
EVT VT = N->getValueType(0);
14772+
EVT ScalarVT = VT.getScalarType();
14773+
EVT IntVT = VT.changeElementType(MVT::i32);
14774+
14775+
SDValue LHS = N->getOperand(0);
14776+
SDValue RHS = N->getOperand(1);
14777+
14778+
// It is cheaper to realize i32 inline constants as compared against
14779+
// materializing f16 or f64 (or even non-inline f32) values,
14780+
// possible via ldexp usage, as shown below :
14781+
//
14782+
// Given : A = 2^a & B = 2^b ; where a and b are integers.
14783+
// fmul x, (select y, A, B) -> ldexp( x, (select i32 y, a, b) )
14784+
// fmul x, (select y, -A, -B) -> ldexp( (fneg x), (select i32 y, a, b) )
14785+
if ((ScalarVT == MVT::f64 || ScalarVT == MVT::f32 || ScalarVT == MVT::f16) &&
14786+
(RHS.hasOneUse() && RHS.getOpcode() == ISD::SELECT)) {
14787+
const ConstantFPSDNode *TrueNode = isConstOrConstSplatFP(RHS.getOperand(1));
14788+
if (!TrueNode)
14789+
return SDValue();
14790+
const ConstantFPSDNode *FalseNode =
14791+
isConstOrConstSplatFP(RHS.getOperand(2));
14792+
if (!FalseNode)
14793+
return SDValue();
14794+
14795+
if (TrueNode->isNegative() != FalseNode->isNegative())
14796+
return SDValue();
14797+
14798+
// For f32, only non-inline constants should be transformed.
14799+
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
14800+
if (ScalarVT == MVT::f32 &&
14801+
TII->isInlineConstant(TrueNode->getValueAPF()) &&
14802+
TII->isInlineConstant(FalseNode->getValueAPF()))
14803+
return SDValue();
14804+
14805+
int TrueNodeExpVal = TrueNode->getValueAPF().getExactLog2Abs();
14806+
if (TrueNodeExpVal == INT_MIN)
14807+
return SDValue();
14808+
int FalseNodeExpVal = FalseNode->getValueAPF().getExactLog2Abs();
14809+
if (FalseNodeExpVal == INT_MIN)
14810+
return SDValue();
14811+
14812+
SDLoc SL(N);
14813+
SDValue SelectNode = DAG.getNode(
14814+
ISD::SELECT, SL, IntVT, RHS.getOperand(0),
14815+
DAG.getSignedConstant(TrueNodeExpVal, SL, IntVT, false, false),
14816+
DAG.getSignedConstant(FalseNodeExpVal, SL, IntVT, false, false));
14817+
14818+
LHS = TrueNode->isNegative()
14819+
? DAG.getNode(ISD::FNEG, SL, VT, LHS, LHS->getFlags())
14820+
: LHS;
14821+
14822+
return DAG.getNode(ISD::FLDEXP, SL, VT, LHS, SelectNode, N->getFlags());
14823+
}
14824+
14825+
return SDValue();
14826+
}
14827+
1476714828
SDValue SITargetLowering::performFMACombine(SDNode *N,
1476814829
DAGCombinerInfo &DCI) const {
1476914830
SelectionDAG &DAG = DCI.DAG;
@@ -15029,6 +15090,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1502915090
return performFSubCombine(N, DCI);
1503015091
case ISD::FDIV:
1503115092
return performFDivCombine(N, DCI);
15093+
case ISD::FMUL:
15094+
return performFMulCombine(N, DCI);
1503215095
case ISD::SETCC:
1503315096
return performSetCCCombine(N, DCI);
1503415097
case ISD::FMAXNUM:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
218218
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
219219
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
220220
SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const;
221+
SDValue performFMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
221222
SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const;
222223
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
223224
SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;

llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1919
; GCN-ALLOCA: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0
2020

2121
; GCN-PROMOTE: s_cmp_eq_u32 [[IN]], 1
22-
; GCN-PROMOTE-NEXT: s_cselect_b64 vcc, -1, 0
23-
; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc
22+
; GCN-PROMOTE-NEXT: s_cselect_b32 [[SCC:s[0-9]+]], 1, 0
23+
; GCN-PROMOTE-NEXT: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[SCC]], v0
2424

2525
; GCN: buffer_store_dword [[RESULT]]
2626
define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) {

0 commit comments

Comments
 (0)