Skip to content

Commit b4f391c

Browse files
committed
Factor shift reducing combine logic into one function.
1 parent 7e9d17a commit b4f391c

File tree

1 file changed

+40
-155
lines changed

1 file changed

+40
-155
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 40 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -4069,15 +4069,22 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
40694069
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
40704070
}
40714071

4072-
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4073-
DAGCombinerInfo &DCI) const {
4074-
EVT VT = N->getValueType(0);
4075-
SDValue LHS = N->getOperand(0);
4076-
SDValue RHS = N->getOperand(1);
4077-
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4078-
SDLoc SL(N);
4079-
SelectionDAG &DAG = DCI.DAG;
4080-
4072+
// Part of the shift combines is to optimise for the case where its possible
4073+
// to reduce e.g shl64 to shl32 if shift range is [63-32]. This
4074+
// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4075+
// '&' is then elided by ISel. The vector code for this was being
4076+
// completely scalarised by the vector legalizer, but when v2i32 is
4077+
// legal the vector legaliser only partially scalarises the
4078+
// vector operations and the and is not elided. This function
4079+
// scalarises the AND for this optimisation case.
4080+
static SDValue getShiftForReduction(unsigned ShiftOpc, SDValue LHS, SDValue RHS,
4081+
SelectionDAG &DAG) {
4082+
4083+
assert(
4084+
(ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL || ShiftOpc == ISD::SHL) &&
4085+
"Expected shift Opcode.");
4086+
4087+
SDLoc SL = SDLoc(RHS);
40814088
if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
40824089
SDValue VAND = RHS.getOperand(0);
40834090
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
@@ -4086,15 +4093,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
40864093
SDValue LHSAND = VAND.getOperand(0);
40874094
SDValue RHSAND = VAND.getOperand(1);
40884095
if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4089-
// Part of shlcombine is to optimise for the case where its possible
4090-
// to reduce shl64 to shl32 if shift range is [63-32]. This
4091-
// transforms: DST = shl i64 X, Y to [0, shl i32 X, (Y & 31) ]. The
4092-
// '&' is then elided by ISel. The vector code for this was being
4093-
// completely scalarised by the vector legalizer, but now v2i32 is
4094-
// made legal the vector legaliser only partially scalarises the
4095-
// vector operations and the and was not elided. This check enables us
4096-
// to locate and scalarise the v2i32 and and re-enable ISel to elide
4097-
// the and instruction.
40984096
ConstantSDNode *CANDL =
40994097
dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
41004098
ConstantSDNode *CANDR =
@@ -4108,19 +4106,33 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
41084106
LHSAND, Zero);
41094107
SDValue Hi =
41104108
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4111-
SDValue LoAnd =
4112-
DAG.getNode(ISD::AND, SL, MVT::i32, Lo, RHSAND->getOperand(0));
4113-
SDValue HiAnd =
4114-
DAG.getNode(ISD::AND, SL, MVT::i32, Hi, RHSAND->getOperand(0));
4109+
SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4110+
SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4111+
SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
41154112
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
41164113
if (AndIndex == 0 || AndIndex == 1)
4117-
return DAG.getNode(ISD::SHL, SL, MVT::i32, Trunc,
4118-
AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
4114+
return DAG.getNode(ShiftOpc, SL, MVT::i32, Trunc,
4115+
AndIndex == 0 ? LoAnd : HiAnd,
4116+
RHS->getFlags());
41194117
}
41204118
}
41214119
}
41224120
}
41234121
}
4122+
return SDValue();
4123+
}
4124+
4125+
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4126+
DAGCombinerInfo &DCI) const {
4127+
EVT VT = N->getValueType(0);
4128+
SDValue LHS = N->getOperand(0);
4129+
SDValue RHS = N->getOperand(1);
4130+
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4131+
SDLoc SL(N);
4132+
SelectionDAG &DAG = DCI.DAG;
4133+
4134+
if(SDValue SS = getShiftForReduction(ISD::SHL, LHS, RHS, DAG))
4135+
return SS;
41244136

41254137
unsigned RHSVal;
41264138
if (CRHS) {
@@ -4222,48 +4234,8 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
42224234
SelectionDAG &DAG = DCI.DAG;
42234235
SDLoc SL(N);
42244236

4225-
if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4226-
SDValue VAND = RHS.getOperand(0);
4227-
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4228-
uint64_t AndIndex = RHS->getConstantOperandVal(1);
4229-
if (VAND->getOpcode() == ISD::AND && CRRHS) {
4230-
SDValue LHSAND = VAND.getOperand(0);
4231-
SDValue RHSAND = VAND.getOperand(1);
4232-
if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4233-
// Part of sracombine is to optimise for the case where its possible
4234-
// to reduce shl64 to shl32 if shift range is [63-32]. This
4235-
// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4236-
// '&' is then elided by ISel. The vector code for this was being
4237-
// completely scalarised by the vector legalizer, but now v2i32 is
4238-
// made legal the vector legaliser only partially scalarises the
4239-
// vector operations and the and was not elided. This check enables us
4240-
// to locate and scalarise the v2i32 and and re-enable ISel to elide
4241-
// the and instruction.
4242-
ConstantSDNode *CANDL =
4243-
dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4244-
ConstantSDNode *CANDR =
4245-
dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4246-
if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4247-
RHSAND->getConstantOperandVal(1) == 0x1f) {
4248-
// Get the non-const AND operands and produce scalar AND
4249-
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4250-
const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4251-
SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4252-
LHSAND, Zero);
4253-
SDValue Hi =
4254-
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4255-
SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4256-
SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4257-
SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4258-
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4259-
if (AndIndex == 0 || AndIndex == 1)
4260-
return DAG.getNode(ISD::SRA, SL, MVT::i32, Trunc,
4261-
AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
4262-
}
4263-
}
4264-
}
4265-
}
4266-
}
4237+
if(SDValue SS = getShiftForReduction(ISD::SRA, LHS, RHS, DAG))
4238+
return SS;
42674239

42684240
if (VT.getScalarType() != MVT::i64)
42694241
return SDValue();
@@ -4357,52 +4329,6 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
43574329
return DAG.getNode(ISD::BITCAST, SL, VT, Vec);
43584330
}
43594331

4360-
// static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4361-
// SDLoc SL = SDLoc(RHS);
4362-
// if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4363-
// SDValue VAND = RHS.getOperand(0);
4364-
// if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4365-
// uint64_t AndIndex = RHS->getConstantOperandVal(1);
4366-
// if (VAND->getOpcode() == ISD::AND && CRRHS) {
4367-
// SDValue LHSAND = VAND.getOperand(0);
4368-
// SDValue RHSAND = VAND.getOperand(1);
4369-
// if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4370-
// // Part of srlcombine is to optimise for the case where its possible
4371-
// // to reduce shl64 to shl32 if shift range is [63-32]. This
4372-
// // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4373-
// // '&' is then elided by ISel. The vector code for this was being
4374-
// // completely scalarised by the vector legalizer, but now v2i32 is
4375-
// // made legal the vector legaliser only partially scalarises the
4376-
// // vector operations and the and was not elided. This check enables us
4377-
// // to locate and scalarise the v2i32 and and re-enable ISel to elide
4378-
// // the and instruction.
4379-
// ConstantSDNode *CANDL =
4380-
// dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4381-
// ConstantSDNode *CANDR =
4382-
// dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4383-
// if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4384-
// RHSAND->getConstantOperandVal(1) == 0x1f) {
4385-
// // Get the non-const AND operands and produce scalar AND
4386-
// const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4387-
// const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4388-
// SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4389-
// LHSAND, Zero);
4390-
// SDValue Hi =
4391-
// DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4392-
// SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4393-
// SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4394-
// SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4395-
// SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4396-
// if (AndIndex == 0 || AndIndex == 1)
4397-
// return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4398-
// AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
4399-
// }
4400-
// }
4401-
// }
4402-
// }
4403-
// }
4404-
// return SDValue();
4405-
// }
44064332

44074333
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
44084334
DAGCombinerInfo &DCI) const {
@@ -4414,49 +4340,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
44144340
SDLoc SL(N);
44154341
unsigned RHSVal;
44164342

4417-
if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4418-
SDValue VAND = RHS.getOperand(0);
4419-
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4420-
uint64_t AndIndex = RHS->getConstantOperandVal(1);
4421-
if (VAND->getOpcode() == ISD::AND && CRRHS) {
4422-
SDValue LHSAND = VAND.getOperand(0);
4423-
SDValue RHSAND = VAND.getOperand(1);
4424-
if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4425-
// Part of srlcombine is to optimise for the case where its possible
4426-
// to reduce shl64 to shl32 if shift range is [63-32]. This
4427-
// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4428-
// '&' is then elided by ISel. The vector code for this was being
4429-
// completely scalarised by the vector legalizer, but now v2i32 is
4430-
// made legal the vector legaliser only partially scalarises the
4431-
// vector operations and the and was not elided. This check enables us
4432-
// to locate and scalarise the v2i32 and and re-enable ISel to elide
4433-
// the and instruction.
4434-
ConstantSDNode *CANDL =
4435-
dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4436-
ConstantSDNode *CANDR =
4437-
dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4438-
if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4439-
RHSAND->getConstantOperandVal(1) == 0x1f) {
4440-
// Get the non-const AND operands and produce scalar AND
4441-
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4442-
const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4443-
SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4444-
LHSAND, Zero);
4445-
SDValue Hi =
4446-
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4447-
SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4448-
SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4449-
SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4450-
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4451-
if (AndIndex == 0 || AndIndex == 1)
4452-
return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4453-
AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
4454-
}
4455-
}
4456-
}
4457-
}
4458-
}
4459-
4343+
if(SDValue SS = getShiftForReduction(ISD::SRL, LHS, RHS, DAG))
4344+
return SS;
44604345

44614346
if (CRHS) {
44624347
RHSVal = CRHS->getZExtValue();

0 commit comments

Comments
 (0)