Skip to content

Commit f861389

Browse files
committed
Work to fix regressions in integer select srcmod generation when v2i32
is made legal for or/xor/and. Complete fix of v2i32 in VOP SrcMod placement.
1 parent ab55852 commit f861389

File tree

5 files changed

+241
-169
lines changed

5 files changed

+241
-169
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3036,36 +3036,62 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
30363036
Src = Src.getOperand(0);
30373037
}
30383038

3039+
// v2i32 xor/or/and are legal. A vselect using these instructions as operands
3040+
// is scalarised into two selects with EXTRACT_VECTOR_ELT operands. Peek
3041+
// through this extract if possible.
3042+
auto getVectorBitWiseOp = [](SDValue S) -> SDValue {
3043+
if (S->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3044+
SDValue VecOp = S->getOperand(0);
3045+
if (VecOp.getOpcode() == ISD::XOR || VecOp.getOpcode() == ISD::AND ||
3046+
VecOp.getOpcode() == ISD::OR)
3047+
return VecOp;
3048+
}
3049+
return SDValue();
3050+
};
3051+
3052+
SDValue Vec = getVectorBitWiseOp(Src);
3053+
SDValue BWSrc = Vec ? Vec : Src;
30393054
// Convert various sign-bit masks to src mods. Currently disabled for 16-bit
30403055
// types as the codegen replaces the operand without adding a srcmod.
30413056
// This is intentionally finding the cases where we are performing float neg
30423057
// and abs on int types, the goal is not to obtain two's complement neg or
30433058
// abs.
30443059
// TODO: Add 16-bit support.
3045-
unsigned Opc = Src->getOpcode();
3060+
unsigned Opc = Vec ? Vec->getOpcode() : Src->getOpcode();
30463061
EVT VT = Src.getValueType();
30473062
if ((Opc != ISD::AND && Opc != ISD::OR && Opc != ISD::XOR) ||
30483063
(VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
30493064
return true;
30503065

3051-
ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1));
3066+
ConstantSDNode *CRHS =
3067+
isConstOrConstSplat(Vec ? Vec->getOperand(1) : Src->getOperand(1));
30523068
if (!CRHS)
30533069
return true;
30543070

3071+
auto ReplaceSrc = [&]() -> SDValue {
3072+
if (Vec) {
3073+
SDValue LHS = BWSrc->getOperand(0);
3074+
SDValue Index = Src->getOperand(1);
3075+
return Src = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Src),
3076+
Src.getValueType(), LHS, Index);
3077+
}
3078+
return Src = BWSrc.getOperand(0);
3079+
};
3080+
30553081
// Recognise (xor a, 0x80000000) as NEG SrcMod.
30563082
// Recognise (and a, 0x7fffffff) as ABS SrcMod.
30573083
// Recognise (or a, 0x80000000) as NEG+ABS SrcModifiers.
30583084
if (Opc == ISD::XOR && CRHS->getAPIntValue().isSignMask()) {
30593085
Mods |= SISrcMods::NEG;
3060-
Src = Src.getOperand(0);
3086+
Src = ReplaceSrc();
30613087
} else if (Opc == ISD::AND && AllowAbs &&
30623088
CRHS->getAPIntValue().isMaxSignedValue()) {
30633089
Mods |= SISrcMods::ABS;
3064-
Src = Src.getOperand(0);
3090+
Src = ReplaceSrc();
30653091
} else if (Opc == ISD::OR && AllowAbs && CRHS->getAPIntValue().isSignMask()) {
30663092
Mods |= SISrcMods::ABS;
30673093
Mods |= SISrcMods::NEG;
3068-
Src = Src.getOperand(0);
3094+
Src = ReplaceSrc();
30693095
}
30703096

30713097
return true;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 162 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -13331,43 +13331,31 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
1333113331
SDValue LHS = N->getOperand(0);
1333213332
SDValue RHS = N->getOperand(1);
1333313333

13334-
// Fold the fneg of a vselect into the v2 vselect operands.
13335-
// xor (vselect c, a, b), 0x80000000 ->
13336-
// bitcast (vselect c, (fneg (bitcast a)), (fneg (bitcast b)))
13337-
if (VT == MVT::v2i32 && LHS.getNumOperands() > 1) {
13338-
13339-
const ConstantSDNode *CRHS0 = dyn_cast<ConstantSDNode>(RHS.getOperand(0));
13340-
const ConstantSDNode *CRHS1 = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
13341-
SDValue LHS_0 = LHS.getOperand(0);
13342-
SDValue LHS_1 = LHS.getOperand(1);
13343-
13344-
if (LHS.getOpcode() == ISD::VSELECT && CRHS0 &&
13345-
CRHS0->getAPIntValue().isSignMask() &&
13346-
shouldFoldFNegIntoSrc(N, LHS_0) && CRHS1 &&
13347-
CRHS1->getAPIntValue().isSignMask() &&
13348-
shouldFoldFNegIntoSrc(N, LHS_1)) {
13349-
13350-
SDLoc DL(N);
13351-
SDValue CastLHS =
13352-
DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(1));
13353-
SDValue CastRHS =
13354-
DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(2));
13355-
SDValue FNegLHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastLHS);
13356-
SDValue FNegRHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastRHS);
13357-
SDValue NewSelect = DAG.getNode(ISD::VSELECT, DL, MVT::v2f32,
13358-
LHS->getOperand(0), FNegLHS, FNegRHS);
13359-
return DAG.getNode(ISD::BITCAST, DL, VT, NewSelect);
13360-
}
13361-
}
13362-
13363-
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
13334+
const ConstantSDNode *CRHS = isConstOrConstSplat(RHS);
1336413335

1336513336
if (CRHS && VT == MVT::i64) {
1336613337
if (SDValue Split =
1336713338
splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
1336813339
return Split;
1336913340
}
1337013341

13342+
// v2i32 (xor (vselect cc, x, y), K) ->
13343+
// (v2i32 svelect cc, (xor x, K), (xor y, K)) This enables the xor to be
13344+
// replaced with source modifiers when the select is lowered to CNDMASK.
13345+
// TODO REMOVE: prevents regressions in fneg-modifier-casting.ll
13346+
unsigned Opc = LHS.getOpcode();
13347+
if(((Opc == ISD::VSELECT && VT==MVT::v2i32) || (Opc == ISD::SELECT && VT==MVT::i64)) && CRHS && CRHS->getAPIntValue().isSignMask()) {
13348+
SDValue CC = LHS->getOperand(0);
13349+
SDValue TRUE = LHS->getOperand(1);
13350+
SDValue FALSE = LHS->getOperand(2);
13351+
SDValue XTrue = DAG.getNode(ISD::XOR, SDLoc(N), VT, TRUE, RHS);
13352+
SDValue XFalse = DAG.getNode(ISD::XOR, SDLoc(N), VT, FALSE, RHS);
13353+
SDValue XSelect = DAG.getNode(ISD::VSELECT, SDLoc(N), VT, CC, XTrue, XFalse);
13354+
return XSelect;
13355+
}
13356+
13357+
13358+
1337113359
// Make sure to apply the 64-bit constant splitting fold before trying to fold
1337213360
// fneg-like xors into 64-bit select.
1337313361
if (LHS.getOpcode() == ISD::SELECT && VT == MVT::i32) {
@@ -14332,125 +14320,165 @@ bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const {
1433214320
EltSize, NumElem, Idx->isDivergent(), getSubtarget());
1433314321
}
1433414322

14335-
SDValue
14336-
SITargetLowering::performExtractVectorEltCombine(SDNode *N,
14337-
DAGCombinerInfo &DCI) const {
14338-
SDValue Vec = N->getOperand(0);
14339-
SelectionDAG &DAG = DCI.DAG;
14323+
// SDValue
14324+
// SITargetLowering::performBuildVectorCombine(SDNode *N,
14325+
// DAGCombinerInfo &DCI) const {
14326+
// // if (N->use_empty())
14327+
// // return SDValue();
1434014328

14341-
EVT VecVT = Vec.getValueType();
14342-
EVT VecEltVT = VecVT.getVectorElementType();
14343-
EVT ResVT = N->getValueType(0);
14329+
// // if(!N->getValueType(0).isFloatingPoint())
14330+
// // return SDValue();
1434414331

14345-
unsigned VecSize = VecVT.getSizeInBits();
14346-
unsigned VecEltSize = VecEltVT.getSizeInBits();
14332+
// // SelectionDAG &DAG = DCI.DAG;
1434714333

14348-
if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) &&
14349-
allUsesHaveSourceMods(N)) {
14350-
SDLoc SL(N);
14351-
SDValue Idx = N->getOperand(1);
14352-
SDValue Elt =
14353-
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec.getOperand(0), Idx);
14354-
return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
14355-
}
14356-
14357-
// ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
14358-
// =>
14359-
// Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
14360-
// Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
14361-
// ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
14362-
if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
14363-
SDLoc SL(N);
14364-
SDValue Idx = N->getOperand(1);
14365-
unsigned Opc = Vec.getOpcode();
14334+
// // // Iterate the operands. Check if source modifier. If so, propogate the
14335+
// // source
14336+
// // // modifier to the user and the srcmod from the BUILD_VECTOR element.
14337+
// // for (unsigned I = 0; I < N->getNumOperands(); I++) {
14338+
// // SDValue E = N->getOperand(I);
14339+
// // if (E->getOpcode() != ISD::FNEG && E->getOpcode() != ISD::ABS)
14340+
// // continue;
1436614341

14367-
switch (Opc) {
14368-
default:
14369-
break;
14370-
// TODO: Support other binary operations.
14371-
case ISD::FADD:
14372-
case ISD::FSUB:
14373-
case ISD::FMUL:
14374-
case ISD::ADD:
14375-
case ISD::UMIN:
14376-
case ISD::UMAX:
14377-
case ISD::SMIN:
14378-
case ISD::SMAX:
14379-
case ISD::FMAXNUM:
14380-
case ISD::FMINNUM:
14381-
case ISD::FMAXNUM_IEEE:
14382-
case ISD::FMINNUM_IEEE:
14383-
case ISD::FMAXIMUM:
14384-
case ISD::FMINIMUM: {
14385-
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14386-
Vec.getOperand(0), Idx);
14387-
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14388-
Vec.getOperand(1), Idx);
14389-
14390-
DCI.AddToWorklist(Elt0.getNode());
14391-
DCI.AddToWorklist(Elt1.getNode());
14392-
return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags());
14393-
}
14394-
}
14395-
}
14396-
14397-
// EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
14398-
if (shouldExpandVectorDynExt(N)) {
14399-
SDLoc SL(N);
14400-
SDValue Idx = N->getOperand(1);
14401-
SDValue V;
14402-
for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
14403-
SDValue IC = DAG.getVectorIdxConstant(I, SL);
14404-
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
14405-
if (I == 0)
14406-
V = Elt;
14407-
else
14408-
V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
14342+
// // // Users through which we can propogate will include users of
14343+
// // // extract_element on this vector, so need to peek-through.
14344+
// // }
14345+
14346+
// // SmallVector<SDNode*, 4> UsersToModify;
14347+
14348+
// // // If the use of the BUILD_VECTOR supports source mods it can be
14349+
// // propogated. for (SDNode *U : N->users()) {
14350+
// // if(!U->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
14351+
// // if (!allUsesHaveSourceMods(U))
14352+
// // continue;
14353+
// // UsersToModify.push_back(U);
14354+
// // }
14355+
14356+
// // for(auto Node: UsersToModify) {
14357+
14358+
// // }
14359+
14360+
// return SDValue();
14361+
// }
14362+
14363+
SDValue SITargetLowering::performExtractVectorEltCombine(
14364+
SDNode * N, DAGCombinerInfo & DCI) const {
14365+
SDValue Vec = N->getOperand(0);
14366+
SelectionDAG &DAG = DCI.DAG;
14367+
14368+
EVT VecVT = Vec.getValueType();
14369+
EVT VecEltVT = VecVT.getVectorElementType();
14370+
EVT ResVT = N->getValueType(0);
14371+
14372+
unsigned VecSize = VecVT.getSizeInBits();
14373+
unsigned VecEltSize = VecEltVT.getSizeInBits();
14374+
14375+
if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) &&
14376+
allUsesHaveSourceMods(N)) {
14377+
SDLoc SL(N);
14378+
SDValue Idx = N->getOperand(1);
14379+
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14380+
Vec.getOperand(0), Idx);
14381+
return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
14382+
}
14383+
14384+
// ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
14385+
// =>
14386+
// Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
14387+
// Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
14388+
// ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
14389+
if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
14390+
SDLoc SL(N);
14391+
SDValue Idx = N->getOperand(1);
14392+
unsigned Opc = Vec.getOpcode();
14393+
14394+
switch (Opc) {
14395+
default:
14396+
break;
14397+
// TODO: Support other binary operations.
14398+
case ISD::FADD:
14399+
case ISD::FSUB:
14400+
case ISD::FMUL:
14401+
case ISD::ADD:
14402+
case ISD::UMIN:
14403+
case ISD::UMAX:
14404+
case ISD::SMIN:
14405+
case ISD::SMAX:
14406+
case ISD::FMAXNUM:
14407+
case ISD::FMINNUM:
14408+
case ISD::FMAXNUM_IEEE:
14409+
case ISD::FMINNUM_IEEE:
14410+
case ISD::FMAXIMUM:
14411+
case ISD::FMINIMUM: {
14412+
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14413+
Vec.getOperand(0), Idx);
14414+
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14415+
Vec.getOperand(1), Idx);
14416+
14417+
DCI.AddToWorklist(Elt0.getNode());
14418+
DCI.AddToWorklist(Elt1.getNode());
14419+
return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags());
14420+
}
14421+
}
1440914422
}
14410-
return V;
14411-
}
1441214423

14413-
if (!DCI.isBeforeLegalize())
14414-
return SDValue();
14424+
// EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
14425+
if (shouldExpandVectorDynExt(N)) {
14426+
SDLoc SL(N);
14427+
SDValue Idx = N->getOperand(1);
14428+
SDValue V;
14429+
for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
14430+
SDValue IC = DAG.getVectorIdxConstant(I, SL);
14431+
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
14432+
if (I == 0)
14433+
V = Elt;
14434+
else
14435+
V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
14436+
}
14437+
return V;
14438+
}
1441514439

14416-
// Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
14417-
// elements. This exposes more load reduction opportunities by replacing
14418-
// multiple small extract_vector_elements with a single 32-bit extract.
14419-
auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14420-
if (isa<MemSDNode>(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() &&
14421-
VecSize > 32 && VecSize % 32 == 0 && Idx) {
14422-
EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
14423-
14424-
unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
14425-
unsigned EltIdx = BitIndex / 32;
14426-
unsigned LeftoverBitIdx = BitIndex % 32;
14427-
SDLoc SL(N);
14440+
if (!DCI.isBeforeLegalize())
14441+
return SDValue();
1442814442

14429-
SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
14430-
DCI.AddToWorklist(Cast.getNode());
14443+
// Try to turn sub-dword accesses of vectors into accesses of the same
14444+
// 32-bit elements. This exposes more load reduction opportunities by
14445+
// replacing multiple small extract_vector_elements with a single 32-bit
14446+
// extract.
14447+
auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14448+
if (isa<MemSDNode>(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() &&
14449+
VecSize > 32 && VecSize % 32 == 0 && Idx) {
14450+
EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
14451+
14452+
unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
14453+
unsigned EltIdx = BitIndex / 32;
14454+
unsigned LeftoverBitIdx = BitIndex % 32;
14455+
SDLoc SL(N);
1443114456

14432-
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
14433-
DAG.getConstant(EltIdx, SL, MVT::i32));
14434-
DCI.AddToWorklist(Elt.getNode());
14435-
SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
14436-
DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
14437-
DCI.AddToWorklist(Srl.getNode());
14457+
SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
14458+
DCI.AddToWorklist(Cast.getNode());
1443814459

14439-
EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger();
14440-
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
14441-
DCI.AddToWorklist(Trunc.getNode());
14460+
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
14461+
DAG.getConstant(EltIdx, SL, MVT::i32));
14462+
DCI.AddToWorklist(Elt.getNode());
14463+
SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
14464+
DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
14465+
DCI.AddToWorklist(Srl.getNode());
1444214466

14443-
if (VecEltVT == ResVT) {
14444-
return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
14467+
EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger();
14468+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
14469+
DCI.AddToWorklist(Trunc.getNode());
14470+
14471+
if (VecEltVT == ResVT) {
14472+
return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
14473+
}
14474+
14475+
assert(ResVT.isScalarInteger());
14476+
return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT);
1444514477
}
1444614478

14447-
assert(ResVT.isScalarInteger());
14448-
return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT);
14479+
return SDValue();
1444914480
}
1445014481

14451-
return SDValue();
14452-
}
14453-
1445414482
SDValue
1445514483
SITargetLowering::performInsertVectorEltCombine(SDNode *N,
1445614484
DAGCombinerInfo &DCI) const {

0 commit comments

Comments
 (0)