Skip to content

Commit 60ee515

Browse files
tonykuttaiTony Varghese
andauthored
[PowerPC] Emit lxvkq and vsrq instructions for build vector patterns (#157625)
### Optimize BUILD_VECTOR having special quadword patterns This change optimizes `BUILD_VECTOR` operations by using the `lxvkq` or `xxpltib + vsrq` instructions to inline constants matching specific 128-bit patterns: - **MSB set pattern**: `0x8000_0000_0000_0000_0000_0000_0000_0000` - **LSB set pattern**: `0x0000_0000_0000_0000_0000_0000_0000_0001` ### Implementation Details The `lxvkq` instruction loads special quadword values into VSX registers: ```asm lxvkq XT, UIM # When UIM=16: loads 0x8000_0000_0000_0000_0000_0000_0000_0000 ``` The optimization reconstructs the 128-bit register pattern from `BUILD_VECTOR` operands, accounting for target endianness. For example, the MSB pattern can be represented as: - **Big-Endian**: `<i64 -9223372036854775808, i64 0>` - **Little-Endian**: `<i64 0, i64 -9223372036854775808>` Both produce the same register value: `0x8000_0000_0000_0000_0000_0000_0000_0000` ### MSB Pattern (`0x8000...0000`) All vector types (`v2i64`, `v4i32`, `v8i16`, `v16i8`) generate: ```asm lxvkq v2, 16 ``` ### LSB Pattern (`0x0000...0001`) All vector types generate: ```asm xxspltib v2, 255 vsrq v2, v2, v2 ``` --------- Co-authored-by: Tony Varghese <[email protected]>
1 parent 856555b commit 60ee515

File tree

4 files changed

+458
-9
lines changed

4 files changed

+458
-9
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
97029702
}
97039703
return SDV;
97049704
}
9705+
// Recognize build vector patterns to emit VSX vector instructions
9706+
// instead of loading value from memory.
9707+
if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9708+
return VecPat;
97059709
}
97069710
// Check if this is a splat of a constant value.
97079711
APInt APSplatBits, APSplatUndef;
@@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N,
1569615700
return SDValue();
1569715701
}
1569815702

15703+
// LXVKQ instruction load VSX vector with a special quadword value
15704+
// based on an immediate value. This helper method returns the details of the
15705+
// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15706+
// to help generate the LXVKQ instruction and the subsequent shift instruction
15707+
// required to match the original build vector pattern.
15708+
15709+
// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15710+
using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15711+
15712+
static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15713+
15714+
// LXVKQ instruction loads the Quadword value:
15715+
// 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15716+
static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15717+
static const uint32_t Uim = 16;
15718+
15719+
// Check for direct LXVKQ match (no shift needed)
15720+
if (FullVal == BasePattern)
15721+
return std::make_tuple(Uim, uint8_t{0});
15722+
15723+
// Check if FullValue is 1 (the result of the base pattern >> 127)
15724+
if (FullVal == APInt(128, 1))
15725+
return std::make_tuple(Uim, uint8_t{127});
15726+
15727+
return std::nullopt;
15728+
}
15729+
15730+
/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15731+
/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15732+
/// LXVKQ instruction load VSX vector with a special quadword value based on an
15733+
/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15734+
/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15735+
/// This can be used to inline the build vector constants that have the
15736+
/// following patterns:
15737+
///
15738+
/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15739+
/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15740+
/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15741+
/// combination of splatting and right shift instructions.
15742+
15743+
SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15744+
SelectionDAG &DAG) const {
15745+
15746+
assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15747+
"Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15748+
15749+
// This transformation is only supported if we are loading either a byte,
15750+
// halfword, word, or doubleword.
15751+
EVT VT = Op.getValueType();
15752+
if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15753+
VT == MVT::v2i64))
15754+
return SDValue();
15755+
15756+
LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15757+
<< VT.getEVTString() << "): ";
15758+
Op->dump());
15759+
15760+
unsigned NumElems = VT.getVectorNumElements();
15761+
unsigned ElemBits = VT.getScalarSizeInBits();
15762+
15763+
bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15764+
15765+
// Check for Non-constant operand in the build vector.
15766+
for (const SDValue &Operand : Op.getNode()->op_values()) {
15767+
if (!isa<ConstantSDNode>(Operand))
15768+
return SDValue();
15769+
}
15770+
15771+
// Assemble build vector operands as a 128-bit register value
15772+
// We need to reconstruct what the 128-bit register pattern would be
15773+
// that produces this vector when interpreted with the current endianness
15774+
APInt FullVal = APInt::getZero(128);
15775+
15776+
for (unsigned Index = 0; Index < NumElems; ++Index) {
15777+
auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15778+
15779+
// Get element value as raw bits (zero-extended)
15780+
uint64_t ElemValue = C->getZExtValue();
15781+
15782+
// Mask to element size to ensure we only get the relevant bits
15783+
if (ElemBits < 64)
15784+
ElemValue &= ((1ULL << ElemBits) - 1);
15785+
15786+
// Calculate bit position for this element in the 128-bit register
15787+
unsigned BitPos =
15788+
(IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15789+
15790+
// Create APInt for the element value and shift it to correct position
15791+
APInt ElemAPInt(128, ElemValue);
15792+
ElemAPInt <<= BitPos;
15793+
15794+
// Place the element value at the correct bit position
15795+
FullVal |= ElemAPInt;
15796+
}
15797+
15798+
if (FullVal.isZero() || FullVal.isAllOnes())
15799+
return SDValue();
15800+
15801+
if (auto UIMOpt = getPatternInfo(FullVal)) {
15802+
const auto &[Uim, ShiftAmount] = *UIMOpt;
15803+
SDLoc Dl(Op);
15804+
15805+
// Generate LXVKQ instruction if the shift amount is zero.
15806+
if (ShiftAmount == 0) {
15807+
SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
15808+
SDValue LxvkqInstr =
15809+
SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
15810+
LLVM_DEBUG(llvm::dbgs()
15811+
<< "combineBVLoadsSpecialValue: Instruction Emitted ";
15812+
LxvkqInstr.dump());
15813+
return LxvkqInstr;
15814+
}
15815+
15816+
assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
15817+
15818+
// The right shifted pattern can be constructed using a combination of
15819+
// XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15820+
// 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
15821+
// value 255.
15822+
SDValue ShiftAmountVec =
15823+
SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
15824+
DAG.getTargetConstant(255, Dl, MVT::i32)),
15825+
0);
15826+
// Generate appropriate right shift instruction
15827+
SDValue ShiftVec = SDValue(
15828+
DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
15829+
0);
15830+
LLVM_DEBUG(llvm::dbgs()
15831+
<< "\n combineBVLoadsSpecialValue: Instruction Emitted ";
15832+
ShiftVec.dump());
15833+
return ShiftVec;
15834+
}
15835+
// No patterns matched for build vectors.
15836+
return SDValue();
15837+
}
15838+
1569915839
/// Reduce the number of loads when building a vector.
1570015840
///
1570115841
/// Building a vector out of multiple loads can be converted to a load

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1472,6 +1472,9 @@ namespace llvm {
14721472
combineElementTruncationToVectorTruncation(SDNode *N,
14731473
DAGCombinerInfo &DCI) const;
14741474

1475+
SDValue combineBVLoadsSpecialValue(SDValue Operand,
1476+
SelectionDAG &DAG) const;
1477+
14751478
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
14761479
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
14771480
/// essentially any shuffle of v8i16 vectors that just inserts one element

0 commit comments

Comments
 (0)