@@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
97029702      }
97039703      return SDV;
97049704    }
9705+     // Recognize build vector patterns to emit VSX vector instructions
9706+     // instead of loading value from memory.
9707+     if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9708+       return VecPat;
97059709  }
97069710  // Check if this is a splat of a constant value.
97079711  APInt APSplatBits, APSplatUndef;
@@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N,
1569615700  return SDValue();
1569715701}
1569815702
15703+ // LXVKQ instruction load VSX vector with a special quadword value
15704+ // based on an immediate value. This helper method returns the details of the
15705+ // match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15706+ // to help generate the LXVKQ instruction and the subsequent shift instruction
15707+ // required to match the original build vector pattern.
15708+ 
15709+ // LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15710+ using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15711+ 
15712+ static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15713+ 
15714+   // LXVKQ instruction loads the Quadword value:
15715+   // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15716+   static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15717+   static const uint32_t Uim = 16;
15718+ 
15719+   // Check for direct LXVKQ match (no shift needed)
15720+   if (FullVal == BasePattern)
15721+     return std::make_tuple(Uim, uint8_t{0});
15722+ 
15723+   // Check if FullValue is 1 (the result of the base pattern >> 127)
15724+   if (FullVal == APInt(128, 1))
15725+     return std::make_tuple(Uim, uint8_t{127});
15726+ 
15727+   return std::nullopt;
15728+ }
15729+ 
15730+ /// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15731+ /// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15732+ /// LXVKQ instruction load VSX vector with a special quadword value based on an
15733+ /// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15734+ /// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15735+ /// This can be used to inline the build vector constants that have the
15736+ /// following patterns:
15737+ ///
15738+ /// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15739+ /// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15740+ /// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15741+ /// combination of splatting and right shift instructions.
15742+ 
15743+ SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15744+                                                       SelectionDAG &DAG) const {
15745+ 
15746+   assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15747+          "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15748+ 
15749+   // This transformation is only supported if we are loading either a byte,
15750+   // halfword, word, or doubleword.
15751+   EVT VT = Op.getValueType();
15752+   if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15753+         VT == MVT::v2i64))
15754+     return SDValue();
15755+ 
15756+   LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15757+                           << VT.getEVTString() << "): ";
15758+              Op->dump());
15759+ 
15760+   unsigned NumElems = VT.getVectorNumElements();
15761+   unsigned ElemBits = VT.getScalarSizeInBits();
15762+ 
15763+   bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15764+ 
15765+   // Check for Non-constant operand in the build vector.
15766+   for (const SDValue &Operand : Op.getNode()->op_values()) {
15767+     if (!isa<ConstantSDNode>(Operand))
15768+       return SDValue();
15769+   }
15770+ 
15771+   // Assemble build vector operands as a 128-bit register value
15772+   // We need to reconstruct what the 128-bit register pattern would be
15773+   // that produces this vector when interpreted with the current endianness
15774+   APInt FullVal = APInt::getZero(128);
15775+ 
15776+   for (unsigned Index = 0; Index < NumElems; ++Index) {
15777+     auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15778+ 
15779+     // Get element value as raw bits (zero-extended)
15780+     uint64_t ElemValue = C->getZExtValue();
15781+ 
15782+     // Mask to element size to ensure we only get the relevant bits
15783+     if (ElemBits < 64)
15784+       ElemValue &= ((1ULL << ElemBits) - 1);
15785+ 
15786+     // Calculate bit position for this element in the 128-bit register
15787+     unsigned BitPos =
15788+         (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15789+ 
15790+     // Create APInt for the element value and shift it to correct position
15791+     APInt ElemAPInt(128, ElemValue);
15792+     ElemAPInt <<= BitPos;
15793+ 
15794+     // Place the element value at the correct bit position
15795+     FullVal |= ElemAPInt;
15796+   }
15797+ 
15798+   if (FullVal.isZero() || FullVal.isAllOnes())
15799+     return SDValue();
15800+ 
15801+   if (auto UIMOpt = getPatternInfo(FullVal)) {
15802+     const auto &[Uim, ShiftAmount] = *UIMOpt;
15803+     SDLoc Dl(Op);
15804+ 
15805+     // Generate LXVKQ instruction if the shift amount is zero.
15806+     if (ShiftAmount == 0) {
15807+       SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
15808+       SDValue LxvkqInstr =
15809+           SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
15810+       LLVM_DEBUG(llvm::dbgs()
15811+                      << "combineBVLoadsSpecialValue: Instruction Emitted ";
15812+                  LxvkqInstr.dump());
15813+       return LxvkqInstr;
15814+     }
15815+ 
15816+     assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
15817+ 
15818+     // The right shifted pattern can be constructed using a combination of
15819+     // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15820+     // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
15821+     // value 255.
15822+     SDValue ShiftAmountVec =
15823+         SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
15824+                                    DAG.getTargetConstant(255, Dl, MVT::i32)),
15825+                 0);
15826+     // Generate appropriate right shift instruction
15827+     SDValue ShiftVec = SDValue(
15828+         DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
15829+         0);
15830+     LLVM_DEBUG(llvm::dbgs()
15831+                    << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
15832+                ShiftVec.dump());
15833+     return ShiftVec;
15834+   }
15835+   // No patterns matched for build vectors.
15836+   return SDValue();
15837+ }
15838+ 
1569915839/// Reduce the number of loads when building a vector.
1570015840///
1570115841/// Building a vector out of multiple loads can be converted to a load
0 commit comments