Skip to content

Commit 7d89994

Browse files
committed
[PowerPC] Use rldimi/rlwimi to optimize build_vector
Leverage rldimi/rlwimi instructions to generate better code for BUILD_VECTOR: - For v16i8, four groups of (i8 << 24) | (i8 << 16) | (i8 << 8) | i8 to construct a vector. - For v8i16, four groups of (i16 << 16) | i16 to construct a vector. We already have patterns for v4i32 and v2i64 construction.
1 parent cc62782 commit 7d89994

14 files changed

+4652
-5579
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9276,6 +9276,49 @@ bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
92769276
return (!LosesInfo && !APFloatToConvert.isDenormal());
92779277
}
92789278

9279+
// Use rldimi/rlwimi to construct vectors:
9280+
// i32 = (i8 << 24) | (i8 << 16) | (i8 << 8) | i
9281+
// i32 = (i16 << 16) | i16
9282+
// i64 = (i32 << 32) | i32
9283+
// And put two i64 together to get a vector.
9284+
static SDValue tryMaskInsertVector(SDValue Op, SelectionDAG &DAG,
9285+
bool LittleEndian) {
9286+
EVT VT = Op.getValueType();
9287+
SDLoc dl(Op);
9288+
9289+
// There are already patterns for v4i32 and v2i64 construction.
9290+
if (VT == MVT::v16i8 || VT == MVT::v8i16) {
9291+
int NumElt = VT.getVectorNumElements();
9292+
int ScalarSize = VT.getScalarSizeInBits();
9293+
int EltsFor32 = NumElt / 4;
9294+
SDValue NewVecElts[4];
9295+
SDValue Parts[4];
9296+
for (int i = 0; i < 4; ++i) {
9297+
for (int j = 0; j < EltsFor32; ++j) {
9298+
SDValue Elt = LittleEndian
9299+
? Op.getOperand(i * EltsFor32 + EltsFor32 - j - 1)
9300+
: Op.getOperand(i * EltsFor32 + j);
9301+
Parts[j] = DAG.getZExtOrTrunc(Elt, dl, MVT::i32);
9302+
9303+
// Left-shift elements to insert, except the last, because offset is 0.
9304+
if (j != EltsFor32 - 1)
9305+
Parts[j] =
9306+
DAG.getNode(ISD::SHL, dl, MVT::i32, Parts[j],
9307+
DAG.getTargetConstant(
9308+
ScalarSize * (EltsFor32 - j - 1), dl, MVT::i32));
9309+
if (j > 0)
9310+
Parts[j] = DAG.getNode(ISD::OR, dl, MVT::i32, Parts[j - 1], Parts[j]);
9311+
}
9312+
NewVecElts[i] = Parts[EltsFor32 - 1];
9313+
}
9314+
9315+
// Count on v4i32 to get optimized BUILD_VECTOR pattern.
9316+
return DAG.getBitcast(VT, DAG.getBuildVector(MVT::v4i32, dl, NewVecElts));
9317+
}
9318+
9319+
return SDValue();
9320+
}
9321+
92799322
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
92809323
unsigned &Opcode) {
92819324
LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
@@ -9457,6 +9500,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
94579500
haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
94589501
Subtarget.hasP8Vector()))
94599502
return Op;
9503+
9504+
// Try to construct vector using masked insert.
9505+
if (!BVN->isConstant() && !DAG.isSplatValue(Op, true))
9506+
if (SDValue Res =
9507+
tryMaskInsertVector(Op, DAG, Subtarget.isLittleEndian()))
9508+
return Res;
9509+
94609510
return SDValue();
94619511
}
94629512

0 commit comments

Comments
 (0)