@@ -9683,13 +9683,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
96839683 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
96849684 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
96859685
9686- // Recognize build vector patterns to emit VSX vector instructions
9687- // instead of loading value from memory.
9688- if (Subtarget.isISA3_1() && Subtarget.hasVSX()) {
9689- if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9690- return VecPat;
9691- }
9692-
96939686 if (Subtarget.hasP10Vector()) {
96949687 APInt BitMask(32, 0);
96959688 // If the value of the vector is all zeros or all ones,
@@ -9709,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
97099702 }
97109703 return SDV;
97119704 }
9705+ // Recognize build vector patterns to emit VSX vector instructions
9706+ // instead of loading value from memory.
9707+ if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9708+ return VecPat;
97129709 }
97139710 // Check if this is a splat of a constant value.
97149711 APInt APSplatBits, APSplatUndef;
@@ -15714,31 +15711,35 @@ using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
1571415711
1571515712static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
1571615713
15717- static const auto BaseLXVKQPatterns = []() {
15718- // LXVKQ instruction loads the Quadword value:
15719- // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15720- return std::array<std::pair<APInt, uint32_t>, 1>{
15721- {{APInt(128, 0x8000000000000000ULL) << 64, 16}}};
15722- }();
15714+ // LXVKQ instruction loads the Quadword value:
15715+ // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15716+ static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15717+ static const uint32_t Uim = 16;
1572315718
1572415719 // Check for direct LXVKQ match (no shift needed)
15725- for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15726- if (FullVal == BasePattern)
15727- return std::make_tuple(Uim, uint8_t{0});
15728- }
15720+ if (FullVal == BasePattern)
15721+ return std::make_tuple(Uim, uint8_t{0});
1572915722
15730- // Check if FullValue can be generated by (right) shifting a base pattern
15731- for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15732- if (BasePattern.lshr(127) == FullVal)
15733- return std::make_tuple(Uim, uint8_t{127});
15734- }
15723+ // Check if FullValue is 1 (the result of the base pattern >> 127)
15724+ if (FullVal == APInt(128, 1))
15725+ return std::make_tuple(Uim, uint8_t{127});
1573515726
1573615727 return std::nullopt;
1573715728}
1573815729
15739- /// Combine vector loads to a single load by recognising patterns in the Build
15740- /// Vector. LXVKQ instruction load VSX vector with a special quadword value
15741- /// based on an immediate value.
15730+ /// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15731+ /// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15732+ /// LXVKQ instruction load VSX vector with a special quadword value based on an
15733+ /// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15734+ /// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15735+ /// This can be used to inline the build vector constants that have the
15736+ /// following patterns:
15737+ ///
15738+ /// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15739+ /// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15740+ /// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15741+ /// combination of splatting and right shift instructions.
15742+
1574215743SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
1574315744 SelectionDAG &DAG) const {
1574415745
@@ -15810,8 +15811,8 @@ SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
1581015811 }
1581115812
1581215813 // The right shifted pattern can be constructed using a combination of
15813- // XXSPLITIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15814- // 7 bits of byte 15. This can be specified using XXSPLITIB with immediate
15814+ // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15815+ // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
1581515816 // value 255.
1581615817 SDValue ShiftAmountVec =
1581715818 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
0 commit comments