Skip to content

Commit e89c08b

Browse files
committed
Factor shift reducing combine logic into one function.
1 parent 216f7c0 commit e89c08b

File tree

1 file changed

+40
-155
lines changed

1 file changed

+40
-155
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 40 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -4067,15 +4067,22 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
40674067
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
40684068
}
40694069

4070-
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4071-
DAGCombinerInfo &DCI) const {
4072-
EVT VT = N->getValueType(0);
4073-
SDValue LHS = N->getOperand(0);
4074-
SDValue RHS = N->getOperand(1);
4075-
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4076-
SDLoc SL(N);
4077-
SelectionDAG &DAG = DCI.DAG;
4078-
4070+
// Part of the shift combines is to optimise for the case where its possible
4071+
// to reduce e.g shl64 to shl32 if shift range is [63-32]. This
4072+
// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4073+
// '&' is then elided by ISel. The vector code for this was being
4074+
// completely scalarised by the vector legalizer, but when v2i32 is
4075+
// legal the vector legaliser only partially scalarises the
4076+
// vector operations and the and is not elided. This function
4077+
// scalarises the AND for this optimisation case.
4078+
static SDValue getShiftForReduction(unsigned ShiftOpc, SDValue LHS, SDValue RHS,
4079+
SelectionDAG &DAG) {
4080+
4081+
assert(
4082+
(ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL || ShiftOpc == ISD::SHL) &&
4083+
"Expected shift Opcode.");
4084+
4085+
SDLoc SL = SDLoc(RHS);
40794086
if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
40804087
SDValue VAND = RHS.getOperand(0);
40814088
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
@@ -4084,15 +4091,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
40844091
SDValue LHSAND = VAND.getOperand(0);
40854092
SDValue RHSAND = VAND.getOperand(1);
40864093
if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4087-
// Part of shlcombine is to optimise for the case where its possible
4088-
// to reduce shl64 to shl32 if shift range is [63-32]. This
4089-
// transforms: DST = shl i64 X, Y to [0, shl i32 X, (Y & 31) ]. The
4090-
// '&' is then elided by ISel. The vector code for this was being
4091-
// completely scalarised by the vector legalizer, but now v2i32 is
4092-
// made legal the vector legaliser only partially scalarises the
4093-
// vector operations and the and was not elided. This check enables us
4094-
// to locate and scalarise the v2i32 and and re-enable ISel to elide
4095-
// the and instruction.
40964094
ConstantSDNode *CANDL =
40974095
dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
40984096
ConstantSDNode *CANDR =
@@ -4106,19 +4104,33 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
41064104
LHSAND, Zero);
41074105
SDValue Hi =
41084106
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4109-
SDValue LoAnd =
4110-
DAG.getNode(ISD::AND, SL, MVT::i32, Lo, RHSAND->getOperand(0));
4111-
SDValue HiAnd =
4112-
DAG.getNode(ISD::AND, SL, MVT::i32, Hi, RHSAND->getOperand(0));
4107+
SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4108+
SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4109+
SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
41134110
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
41144111
if (AndIndex == 0 || AndIndex == 1)
4115-
return DAG.getNode(ISD::SHL, SL, MVT::i32, Trunc,
4116-
AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
4112+
return DAG.getNode(ShiftOpc, SL, MVT::i32, Trunc,
4113+
AndIndex == 0 ? LoAnd : HiAnd,
4114+
RHS->getFlags());
41174115
}
41184116
}
41194117
}
41204118
}
41214119
}
4120+
return SDValue();
4121+
}
4122+
4123+
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4124+
DAGCombinerInfo &DCI) const {
4125+
EVT VT = N->getValueType(0);
4126+
SDValue LHS = N->getOperand(0);
4127+
SDValue RHS = N->getOperand(1);
4128+
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4129+
SDLoc SL(N);
4130+
SelectionDAG &DAG = DCI.DAG;
4131+
4132+
if(SDValue SS = getShiftForReduction(ISD::SHL, LHS, RHS, DAG))
4133+
return SS;
41224134

41234135
unsigned RHSVal;
41244136
if (CRHS) {
@@ -4220,48 +4232,8 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
42204232
SelectionDAG &DAG = DCI.DAG;
42214233
SDLoc SL(N);
42224234

4223-
if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4224-
SDValue VAND = RHS.getOperand(0);
4225-
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4226-
uint64_t AndIndex = RHS->getConstantOperandVal(1);
4227-
if (VAND->getOpcode() == ISD::AND && CRRHS) {
4228-
SDValue LHSAND = VAND.getOperand(0);
4229-
SDValue RHSAND = VAND.getOperand(1);
4230-
if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4231-
// Part of sracombine is to optimise for the case where its possible
4232-
// to reduce shl64 to shl32 if shift range is [63-32]. This
4233-
// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4234-
// '&' is then elided by ISel. The vector code for this was being
4235-
// completely scalarised by the vector legalizer, but now v2i32 is
4236-
// made legal the vector legaliser only partially scalarises the
4237-
// vector operations and the and was not elided. This check enables us
4238-
// to locate and scalarise the v2i32 and and re-enable ISel to elide
4239-
// the and instruction.
4240-
ConstantSDNode *CANDL =
4241-
dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4242-
ConstantSDNode *CANDR =
4243-
dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4244-
if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4245-
RHSAND->getConstantOperandVal(1) == 0x1f) {
4246-
// Get the non-const AND operands and produce scalar AND
4247-
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4248-
const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4249-
SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4250-
LHSAND, Zero);
4251-
SDValue Hi =
4252-
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4253-
SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4254-
SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4255-
SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4256-
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4257-
if (AndIndex == 0 || AndIndex == 1)
4258-
return DAG.getNode(ISD::SRA, SL, MVT::i32, Trunc,
4259-
AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
4260-
}
4261-
}
4262-
}
4263-
}
4264-
}
4235+
if(SDValue SS = getShiftForReduction(ISD::SRA, LHS, RHS, DAG))
4236+
return SS;
42654237

42664238
if (VT.getScalarType() != MVT::i64)
42674239
return SDValue();
@@ -4355,52 +4327,6 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
43554327
return DAG.getNode(ISD::BITCAST, SL, VT, Vec);
43564328
}
43574329

4358-
// static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4359-
// SDLoc SL = SDLoc(RHS);
4360-
// if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4361-
// SDValue VAND = RHS.getOperand(0);
4362-
// if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4363-
// uint64_t AndIndex = RHS->getConstantOperandVal(1);
4364-
// if (VAND->getOpcode() == ISD::AND && CRRHS) {
4365-
// SDValue LHSAND = VAND.getOperand(0);
4366-
// SDValue RHSAND = VAND.getOperand(1);
4367-
// if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4368-
// // Part of srlcombine is to optimise for the case where its possible
4369-
// // to reduce shl64 to shl32 if shift range is [63-32]. This
4370-
// // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4371-
// // '&' is then elided by ISel. The vector code for this was being
4372-
// // completely scalarised by the vector legalizer, but now v2i32 is
4373-
// // made legal the vector legaliser only partially scalarises the
4374-
// // vector operations and the and was not elided. This check enables us
4375-
// // to locate and scalarise the v2i32 and and re-enable ISel to elide
4376-
// // the and instruction.
4377-
// ConstantSDNode *CANDL =
4378-
// dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4379-
// ConstantSDNode *CANDR =
4380-
// dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4381-
// if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4382-
// RHSAND->getConstantOperandVal(1) == 0x1f) {
4383-
// // Get the non-const AND operands and produce scalar AND
4384-
// const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4385-
// const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4386-
// SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4387-
// LHSAND, Zero);
4388-
// SDValue Hi =
4389-
// DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4390-
// SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4391-
// SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4392-
// SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4393-
// SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4394-
// if (AndIndex == 0 || AndIndex == 1)
4395-
// return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4396-
// AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
4397-
// }
4398-
// }
4399-
// }
4400-
// }
4401-
// }
4402-
// return SDValue();
4403-
// }
44044330

44054331
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
44064332
DAGCombinerInfo &DCI) const {
@@ -4412,49 +4338,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
44124338
SDLoc SL(N);
44134339
unsigned RHSVal;
44144340

4415-
if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4416-
SDValue VAND = RHS.getOperand(0);
4417-
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4418-
uint64_t AndIndex = RHS->getConstantOperandVal(1);
4419-
if (VAND->getOpcode() == ISD::AND && CRRHS) {
4420-
SDValue LHSAND = VAND.getOperand(0);
4421-
SDValue RHSAND = VAND.getOperand(1);
4422-
if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4423-
// Part of srlcombine is to optimise for the case where its possible
4424-
// to reduce shl64 to shl32 if shift range is [63-32]. This
4425-
// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4426-
// '&' is then elided by ISel. The vector code for this was being
4427-
// completely scalarised by the vector legalizer, but now v2i32 is
4428-
// made legal the vector legaliser only partially scalarises the
4429-
// vector operations and the and was not elided. This check enables us
4430-
// to locate and scalarise the v2i32 and and re-enable ISel to elide
4431-
// the and instruction.
4432-
ConstantSDNode *CANDL =
4433-
dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4434-
ConstantSDNode *CANDR =
4435-
dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4436-
if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4437-
RHSAND->getConstantOperandVal(1) == 0x1f) {
4438-
// Get the non-const AND operands and produce scalar AND
4439-
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4440-
const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4441-
SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4442-
LHSAND, Zero);
4443-
SDValue Hi =
4444-
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4445-
SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4446-
SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4447-
SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4448-
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4449-
if (AndIndex == 0 || AndIndex == 1)
4450-
return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4451-
AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
4452-
}
4453-
}
4454-
}
4455-
}
4456-
}
4457-
4341+
if(SDValue SS = getShiftForReduction(ISD::SRL, LHS, RHS, DAG))
4342+
return SS;
44584343

44594344
if (CRHS) {
44604345
RHSVal = CRHS->getZExtValue();

0 commit comments

Comments
 (0)