Skip to content

Commit 573adfe

Browse files
committed
Work to fix regressions in integer select srcmod generation when v2i32
is made legal for or/xor/and. Complete fix of v2i32 in VOP SrcMod placement.
1 parent 5a97e1c commit 573adfe

File tree

6 files changed

+255
-179
lines changed

6 files changed

+255
-179
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3059,36 +3059,62 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
30593059
Src = Src.getOperand(0);
30603060
}
30613061

3062+
// v2i32 xor/or/and are legal. A vselect using these instructions as operands
3063+
// is scalarised into two selects with EXTRACT_VECTOR_ELT operands. Peek
3064+
// through this extract if possible.
3065+
auto getVectorBitWiseOp = [](SDValue S) -> SDValue {
3066+
if (S->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3067+
SDValue VecOp = S->getOperand(0);
3068+
if (VecOp.getOpcode() == ISD::XOR || VecOp.getOpcode() == ISD::AND ||
3069+
VecOp.getOpcode() == ISD::OR)
3070+
return VecOp;
3071+
}
3072+
return SDValue();
3073+
};
3074+
3075+
SDValue Vec = getVectorBitWiseOp(Src);
3076+
SDValue BWSrc = Vec ? Vec : Src;
30623077
// Convert various sign-bit masks to src mods. Currently disabled for 16-bit
30633078
// types as the codegen replaces the operand without adding a srcmod.
30643079
// This is intentionally finding the cases where we are performing float neg
30653080
// and abs on int types, the goal is not to obtain two's complement neg or
30663081
// abs.
30673082
// TODO: Add 16-bit support.
3068-
unsigned Opc = Src->getOpcode();
3083+
unsigned Opc = Vec ? Vec->getOpcode() : Src->getOpcode();
30693084
EVT VT = Src.getValueType();
30703085
if ((Opc != ISD::AND && Opc != ISD::OR && Opc != ISD::XOR) ||
30713086
(VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
30723087
return true;
30733088

3074-
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1));
3089+
ConstantSDNode *CRHS =
3090+
isConstOrConstSplat(Vec ? Vec->getOperand(1) : Src->getOperand(1));
30753091
if (!CRHS)
30763092
return true;
30773093

3094+
auto ReplaceSrc = [&]() -> SDValue {
3095+
if (Vec) {
3096+
SDValue LHS = BWSrc->getOperand(0);
3097+
SDValue Index = Src->getOperand(1);
3098+
return Src = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Src),
3099+
Src.getValueType(), LHS, Index);
3100+
}
3101+
return Src = BWSrc.getOperand(0);
3102+
};
3103+
30783104
// Recognise (xor a, 0x80000000) as NEG SrcMod.
30793105
// Recognise (and a, 0x7fffffff) as ABS SrcMod.
30803106
// Recognise (or a, 0x80000000) as NEG+ABS SrcModifiers.
30813107
if (Opc == ISD::XOR && CRHS->getAPIntValue().isSignMask()) {
30823108
Mods |= SISrcMods::NEG;
3083-
Src = Src.getOperand(0);
3109+
Src = ReplaceSrc();
30843110
} else if (Opc == ISD::AND && AllowAbs &&
30853111
CRHS->getAPIntValue().isMaxSignedValue()) {
30863112
Mods |= SISrcMods::ABS;
3087-
Src = Src.getOperand(0);
3113+
Src = ReplaceSrc();
30883114
} else if (Opc == ISD::OR && AllowAbs && CRHS->getAPIntValue().isSignMask()) {
30893115
Mods |= SISrcMods::ABS;
30903116
Mods |= SISrcMods::NEG;
3091-
Src = Src.getOperand(0);
3117+
Src = ReplaceSrc();
30923118
}
30933119

30943120
return true;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4251,12 +4251,12 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
42514251
(ElementType.getSizeInBits() - 1)) {
42524252
ShiftAmt = ShiftFullAmt;
42534253
} else {
4254-
SDValue truncShiftAmt = DAG.getNode(ISD::TRUNCATE, SL, TargetType, RHS);
4254+
SDValue TruncShiftAmt = DAG.getNode(ISD::TRUNCATE, SL, TargetType, RHS);
42554255
const SDValue ShiftMask =
42564256
DAG.getConstant(TargetScalarType.getSizeInBits() - 1, SL, TargetType);
42574257
// This AND instruction will clamp out of bounds shift values.
42584258
// It will also be removed during later instruction selection.
4259-
ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, truncShiftAmt, ShiftMask);
4259+
ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, TruncShiftAmt, ShiftMask);
42604260
}
42614261

42624262
EVT ConcatType;
@@ -4313,16 +4313,8 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
43134313
return DAG.getNode(ISD::BITCAST, SL, VT, Vec);
43144314
}
43154315

4316-
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
4317-
DAGCombinerInfo &DCI) const {
4318-
SDValue RHS = N->getOperand(1);
4319-
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4320-
EVT VT = N->getValueType(0);
4321-
SDValue LHS = N->getOperand(0);
4322-
SelectionDAG &DAG = DCI.DAG;
4323-
SDLoc SL(N);
4324-
unsigned RHSVal;
4325-
4316+
static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4317+
SDLoc SL = SDLoc(RHS);
43264318
if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
43274319
SDValue VAND = RHS.getOperand(0);
43284320
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
@@ -4359,12 +4351,26 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
43594351
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
43604352
if (AndIndex == 0 || AndIndex == 1)
43614353
return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4362-
AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
4354+
AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
43634355
}
43644356
}
43654357
}
43664358
}
43674359
}
4360+
return SDValue();
4361+
}
4362+
4363+
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
4364+
DAGCombinerInfo &DCI) const {
4365+
SDValue RHS = N->getOperand(1);
4366+
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4367+
EVT VT = N->getValueType(0);
4368+
SDValue LHS = N->getOperand(0);
4369+
SelectionDAG &DAG = DCI.DAG;
4370+
SDLoc SL(N);
4371+
unsigned RHSVal;
4372+
4373+
43684374

43694375
if (CRHS) {
43704376
RHSVal = CRHS->getZExtValue();

0 commit comments

Comments
 (0)