Skip to content

Conversation

@zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Aug 20, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 20, 2025

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

Patch is 43.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154533.diff

5 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+53)
  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+2)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+21-1)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (+132-542)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (+9-51)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5b2d185594f44..de2a27143c389 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2434,6 +2434,7 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
                                                    SelectionDAG &DAG) const {
   BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
+  MVT VT = Node->getSimpleValueType(0);
   EVT ResTy = Op->getValueType(0);
   unsigned NumElts = ResTy.getVectorNumElements();
   SDLoc DL(Op);
@@ -2517,6 +2518,56 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
   }
 
   if (!IsConstant) {
+    // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
+    // the sub-sequence of the vector and then broadcast the sub-sequence.
+    SmallVector<SDValue> Sequence;
+    BitVector UndefElements;
+    if (Node->getRepeatedSequence(Sequence, &UndefElements)) {
+      // TODO: If the BUILD_VECTOR contains undef elements, consider falling
+      // back to use INSERT_VECTOR_ELT to materialize the vector, because it
+      // generates worse code in some cases. This could be further optimized
+      // with more consideration.
+      if (UndefElements.count() == 0) {
+        unsigned SeqLen = Sequence.size();
+
+        SDValue Op0 = Sequence[0];
+        SDValue Vector = DAG.getUNDEF(ResTy);
+        if (!Op0.isUndef())
+          Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
+        for (unsigned i = 1; i < SeqLen; ++i) {
+          SDValue Opi = Sequence[i];
+          if (Opi.isUndef())
+            continue;
+          Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
+                               DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+        }
+
+        unsigned SplatLen = NumElts / SeqLen;
+        MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
+        MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
+
+        // If size of the sub-sequence is half of a 256-bits vector, bitcast the
+        // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
+        if (SplatEltTy == MVT::i128)
+          SplatTy = MVT::v4i64;
+
+        SDValue SrcVec = DAG.getBitcast(SplatTy, Vector);
+        SDValue SplatVec;
+        if (SplatTy.is256BitVector()) {
+          SplatVec =
+              DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
+                                                    : LoongArchISD::XVREPLVE0,
+                          DL, SplatTy, SrcVec);
+        } else {
+          SplatVec =
+              DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
+                          DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+        }
+
+        return DAG.getBitcast(ResTy, SplatVec);
+      }
+    }
+
     // Use INSERT_VECTOR_ELT operations rather than expand to stores.
     // The resulting code is the same length as the expansion, but it doesn't
     // use memory operations.
@@ -6637,6 +6688,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
     NODE_NAME_CASE(VREPLVEI)
     NODE_NAME_CASE(VREPLGR2VR)
     NODE_NAME_CASE(XVPERMI)
+    NODE_NAME_CASE(XVREPLVE0)
+    NODE_NAME_CASE(XVREPLVE0Q)
     NODE_NAME_CASE(VPICK_SEXT_ELT)
     NODE_NAME_CASE(VPICK_ZEXT_ELT)
     NODE_NAME_CASE(VREPLVE)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index f79ba7450cc36..9ab867a918f4e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -141,6 +141,8 @@ enum NodeType : unsigned {
   VREPLVEI,
   VREPLGR2VR,
   XVPERMI,
+  XVREPLVE0,
+  XVREPLVE0Q,
 
   // Extended vector element extraction
   VPICK_SEXT_ELT,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 0696b11d62ac9..962448fcb470d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -10,8 +10,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+def SDT_LoongArchXVREPLVE0 : SDTypeProfile<1, 1, [SDTCisVec<0>,
+                                                  SDTCisSameAs<0, 1>]>;
+
 // Target nodes.
 def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
+def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>;
+def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>;
 def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>;
 def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>;
 def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>;
@@ -1852,11 +1857,26 @@ def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8),
 def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8),
           (XVPERMI_D v4f64:$xj, immZExt8: $ui8)>;
 
-// XVREPLVE0_{W/D}
+// XVREPLVE0_{B/H/W/D/Q}
+def : Pat<(loongarch_xvreplve0 v32i8:$xj),
+          (XVREPLVE0_B v32i8:$xj)>;
+def : Pat<(loongarch_xvreplve0 v16i16:$xj),
+          (XVREPLVE0_H v16i16:$xj)>;
+def : Pat<(loongarch_xvreplve0 v8i32:$xj),
+          (XVREPLVE0_W v8i32:$xj)>;
+def : Pat<(loongarch_xvreplve0 v4i64:$xj),
+          (XVREPLVE0_D v4i64:$xj)>;
+def : Pat<(loongarch_xvreplve0 v8f32:$xj),
+          (XVREPLVE0_W v8f32:$xj)>;
+def : Pat<(loongarch_xvreplve0 v4f64:$xj),
+          (XVREPLVE0_D v4f64:$xj)>;
 def : Pat<(lasxsplatf32 FPR32:$fj),
           (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>;
 def : Pat<(lasxsplatf64 FPR64:$fj),
           (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>;
+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
+  def : Pat<(vt (loongarch_xvreplve0q LASX256:$xj)),
+            (XVREPLVE0_Q LASX256:$xj)>;
 
 // VSTELM
 defm : VstelmPat<truncstorei8, v32i8, XVSTELM_B, simm8, uimm5>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 44803e7078c45..5b1242e29cd60 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -601,100 +601,53 @@ define void @buildvector_v32i8_subseq_2(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3
 ; CHECK-NEXT:    ld.b $t6, $sp, 16
 ; CHECK-NEXT:    ld.b $t7, $sp, 8
 ; CHECK-NEXT:    ld.b $t8, $sp, 0
-; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a2
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 0
-; CHECK-NEXT:    xvori.b $xr3, $xr1, 0
-; CHECK-NEXT:    xvpermi.q $xr3, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr2, $a3
-; CHECK-NEXT:    xvextrins.b $xr0, $xr3, 17
-; CHECK-NEXT:    xvori.b $xr4, $xr2, 0
-; CHECK-NEXT:    xvpermi.q $xr4, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr3, $a4
-; CHECK-NEXT:    xvextrins.b $xr0, $xr4, 34
-; CHECK-NEXT:    xvori.b $xr5, $xr3, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr4, $a5
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 51
-; CHECK-NEXT:    xvori.b $xr6, $xr4, 0
-; CHECK-NEXT:    xvpermi.q $xr6, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr5, $a6
-; CHECK-NEXT:    xvextrins.b $xr0, $xr6, 68
-; CHECK-NEXT:    xvori.b $xr7, $xr5, 0
-; CHECK-NEXT:    xvpermi.q $xr7, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr6, $a7
-; CHECK-NEXT:    xvextrins.b $xr0, $xr7, 85
-; CHECK-NEXT:    xvori.b $xr8, $xr6, 0
-; CHECK-NEXT:    xvpermi.q $xr8, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr7, $t8
-; CHECK-NEXT:    xvextrins.b $xr0, $xr8, 102
-; CHECK-NEXT:    xvori.b $xr9, $xr7, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr8, $t7
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 119
-; CHECK-NEXT:    xvori.b $xr10, $xr8, 0
-; CHECK-NEXT:    xvpermi.q $xr10, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr9, $t6
-; CHECK-NEXT:    xvextrins.b $xr0, $xr10, 136
-; CHECK-NEXT:    xvori.b $xr11, $xr9, 0
-; CHECK-NEXT:    xvpermi.q $xr11, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr10, $t5
-; CHECK-NEXT:    xvextrins.b $xr0, $xr11, 153
-; CHECK-NEXT:    xvori.b $xr12, $xr10, 0
-; CHECK-NEXT:    xvpermi.q $xr12, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr11, $t4
-; CHECK-NEXT:    xvextrins.b $xr0, $xr12, 170
-; CHECK-NEXT:    xvori.b $xr13, $xr11, 0
-; CHECK-NEXT:    xvpermi.q $xr13, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr12, $t3
-; CHECK-NEXT:    xvextrins.b $xr0, $xr13, 187
-; CHECK-NEXT:    xvori.b $xr14, $xr12, 0
-; CHECK-NEXT:    xvpermi.q $xr14, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr13, $t2
-; CHECK-NEXT:    xvextrins.b $xr0, $xr14, 204
-; CHECK-NEXT:    xvori.b $xr15, $xr13, 0
-; CHECK-NEXT:    xvpermi.q $xr15, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr14, $t1
-; CHECK-NEXT:    xvextrins.b $xr0, $xr15, 221
-; CHECK-NEXT:    xvori.b $xr16, $xr14, 0
-; CHECK-NEXT:    xvpermi.q $xr16, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr15, $t0
-; CHECK-NEXT:    xvextrins.b $xr0, $xr16, 238
-; CHECK-NEXT:    xvori.b $xr16, $xr15, 0
-; CHECK-NEXT:    xvpermi.q $xr16, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr16, 255
-; CHECK-NEXT:    xvreplgr2vr.b $xr16, $a1
-; CHECK-NEXT:    xvpermi.q $xr16, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr16, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 48
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
 ; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 17
-; CHECK-NEXT:    xvpermi.q $xr2, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr2, 34
-; CHECK-NEXT:    xvpermi.q $xr3, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr3, 51
-; CHECK-NEXT:    xvpermi.q $xr4, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr4, 68
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 85
-; CHECK-NEXT:    xvpermi.q $xr6, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr6, 102
-; CHECK-NEXT:    xvpermi.q $xr7, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr7, 119
-; CHECK-NEXT:    xvpermi.q $xr8, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr8, 136
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 153
-; CHECK-NEXT:    xvpermi.q $xr10, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr10, 170
-; CHECK-NEXT:    xvpermi.q $xr11, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr11, 187
-; CHECK-NEXT:    xvpermi.q $xr12, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr12, 204
-; CHECK-NEXT:    xvpermi.q $xr13, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr13, 221
-; CHECK-NEXT:    xvpermi.q $xr14, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr14, 238
-; CHECK-NEXT:    xvpermi.q $xr15, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr15, 255
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a3
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 34
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a4
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 51
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a5
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 68
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a6
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 85
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a7
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 102
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t8
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 119
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t7
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 136
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t6
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 153
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t5
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 170
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t4
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 187
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t3
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 204
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t2
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 221
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t1
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 238
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t0
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 255
+; CHECK-NEXT:    xvreplve0.q $xr0, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -738,100 +691,29 @@ define void @buildvector_v32i8_subseq_4(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3
 ; CHECK-LABEL: buildvector_v32i8_subseq_4:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ld.b $t0, $sp, 0
-; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a2
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 0
-; CHECK-NEXT:    xvori.b $xr3, $xr1, 0
-; CHECK-NEXT:    xvpermi.q $xr3, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr2, $a3
-; CHECK-NEXT:    xvextrins.b $xr0, $xr3, 17
-; CHECK-NEXT:    xvori.b $xr4, $xr2, 0
-; CHECK-NEXT:    xvpermi.q $xr4, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr3, $a4
-; CHECK-NEXT:    xvextrins.b $xr0, $xr4, 34
-; CHECK-NEXT:    xvori.b $xr5, $xr3, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr4, $a5
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 51
-; CHECK-NEXT:    xvori.b $xr6, $xr4, 0
-; CHECK-NEXT:    xvpermi.q $xr6, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr5, $a6
-; CHECK-NEXT:    xvextrins.b $xr0, $xr6, 68
-; CHECK-NEXT:    xvori.b $xr7, $xr5, 0
-; CHECK-NEXT:    xvpermi.q $xr7, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr6, $a7
-; CHECK-NEXT:    xvextrins.b $xr0, $xr7, 85
-; CHECK-NEXT:    xvori.b $xr8, $xr6, 0
-; CHECK-NEXT:    xvpermi.q $xr8, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr7, $t0
-; CHECK-NEXT:    xvextrins.b $xr0, $xr8, 102
-; CHECK-NEXT:    xvori.b $xr9, $xr7, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr8, $a1
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 119
-; CHECK-NEXT:    xvori.b $xr9, $xr8, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 136
-; CHECK-NEXT:    xvori.b $xr9, $xr1, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 153
-; CHECK-NEXT:    xvori.b $xr9, $xr2, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 170
-; CHECK-NEXT:    xvori.b $xr9, $xr3, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 187
-; CHECK-NEXT:    xvori.b $xr9, $xr4, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 204
-; CHECK-NEXT:    xvori.b $xr9, $xr5, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 221
-; CHECK-NEXT:    xvori.b $xr9, $xr6, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 238
-; CHECK-NEXT:    xvori.b $xr9, $xr7, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 255
-; CHECK-NEXT:    xvori.b $xr9, $xr8, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 0
-; CHECK-NEXT:    xvori.b $xr9, $xr1, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 17
-; CHECK-NEXT:    xvori.b $xr9, $xr2, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 34
-; CHECK-NEXT:    xvori.b $xr9, $xr3, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 51
-; CHECK-NEXT:    xvori.b $xr9, $xr4, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 68
-; CHECK-NEXT:    xvori.b $xr9, $xr5, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 85
-; CHECK-NEXT:    xvori.b $xr9, $xr6, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 102
-; CHECK-NEXT:    xvori.b $xr9, $xr7, 0
-; CHECK-NEXT:    xvpermi.q $xr9, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr9, 119
-; CHECK-NEXT:    xvpermi.q $xr8, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr8, 136
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 153
-; CHECK-NEXT:    xvpermi.q $xr2, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr2, 170
-; CHECK-NEXT:    xvpermi.q $xr3, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr3, 187
-; CHECK-NEXT:    xvpermi.q $xr4, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr4, 204
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 221
-; CHECK-NEXT:    xvpermi.q $xr6, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr6, 238
-; CHECK-NEXT:    xvpermi.q $xr7, $xr0, 48
-; CHECK-NEXT:    xvextrins.b $xr0, $xr7, 255
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 17
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a3
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 34
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a4
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 51
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a5
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 68
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a6
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 85
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a7
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 102
+; CHECK-NEXT:    xvreplgr2vr.b $xr1, $t0
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 18
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 119
+; CHECK-NEXT:    xvreplve0.d $xr0, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -874,100 +756,17 @@ entry:
 define void @buildvector_v32i8_subseq_8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3) nounwind {
 ; CHECK-LABEL: buildvector_v32i8_subseq_8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvreplgr2vr.b $xr1, $a2
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 0
-; CHECK-NEXT:    xvori.b $xr3, $xr1, 0
-; CHECK-NEXT:    xvpermi.q $xr3, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr2, $a3
-; CHECK-NEXT:    xvextrins.b $xr0, $xr3, 17
-; CHECK-NEXT:    xvori.b $xr4, $xr2, 0
-; CHECK-NEXT:    xvpermi.q $xr4, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr3, $a4
-; CHECK-NEXT:    xvextrins.b $xr0, $xr4, 34
-; CHECK-NEXT:    xvori.b $xr5, $xr3, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvreplgr2vr.b $xr4, $a1
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 51
-; CHECK-NEXT:    xvori.b $xr5, $xr4, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 68
-; CHECK-NEXT:    xvori.b $xr5, $xr1, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 85
-; CHECK-NEXT:    xvori.b $xr5, $xr2, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 102
-; CHECK-NEXT:    xvori.b $xr5, $xr3, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 119
-; CHECK-NEXT:    xvori.b $xr5, $xr4, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 136
-; CHECK-NEXT:    xvori.b $xr5, $xr1, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 153
-; CHECK-NEXT:    xvori.b $xr5, $xr2, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 170
-; CHECK-NEXT:    xvori.b $xr5, $xr3, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 187
-; CHECK-NEXT:    xvori.b $xr5, $xr4, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 204
-; CHECK-NEXT:    xvori.b $xr5, $xr1, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 221
-; CHECK-NEXT:    xvori.b $xr5, $xr2, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 238
-; CHECK-NEXT:    xvori.b $xr5, $xr3, 0
-; CHECK-NEXT:    xvpermi.q $xr5, $xr0, 18
-; CHECK-NEXT:    xvextrins.b $xr0, $xr5, 255
-; CHECK-NEXT:    xv...
[truncated]

@tangaac
Copy link
Member

tangaac commented Aug 21, 2025

We may do not need XVREPLVE0 and XVREPLVE0Q.
XVREPLVE0 could be replaced with vectorshuffle %v, poison, splat 0
XVREPLVE0Q could be replaced with xvpermi.q xr0, xr0, 2

@zhaoqi5
Copy link
Contributor Author

zhaoqi5 commented Aug 21, 2025

We may do not need XVREPLVE0 and XVREPLVE0Q. XVREPLVE0 could be replaced with vectorshuffle %v, poison, splat 0 XVREPLVE0Q could be replaced with xvpermi.q xr0, xr0, 2

I have already tried to directly use vectorshuffle instead, but it will be customed as xvpermi+xvrepl128vei because of the canonicalization when lowering 256bit vector_shuffle.

If using vectorshuffle here, vector_shuffle canonicalization should be modified and new lowering pattern to match this may should be added.

@zhaoqi5 zhaoqi5 requested review from SixWeining and tangaac August 21, 2025 04:01
Base automatically changed from users/zhaoqi5/test-repeated-buildvector to main August 28, 2025 08:28
@zhaoqi5 zhaoqi5 force-pushed the users/zhaoqi5/broadcast-repeated-buildvector branch 2 times, most recently from ca08e88 to 0e3aa0d Compare September 2, 2025 02:21
Comment on lines +85 to +89
def immZExt1 : ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]>;
def immZExt4 : ImmLeaf<GRLenVT, [{return isUInt<4>(Imm);}]>;
def immZExt8 : ImmLeaf<GRLenVT, [{return isUInt<8>(Imm);}]>;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests for la32 passed.

@zhaoqi5 zhaoqi5 requested a review from heiher September 16, 2025 12:22
@zhaoqi5 zhaoqi5 force-pushed the users/zhaoqi5/broadcast-repeated-buildvector branch from 0e3aa0d to f028d60 Compare September 17, 2025 10:12
@zhaoqi5
Copy link
Contributor Author

zhaoqi5 commented Sep 17, 2025

ping

@zhaoqi5 zhaoqi5 merged commit add9079 into main Sep 22, 2025
9 checks passed
@zhaoqi5 zhaoqi5 deleted the users/zhaoqi5/broadcast-repeated-buildvector branch September 22, 2025 08:04
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 22, 2025

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/24566

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: commands/memory/write/TestMemoryWrite.py (194 of 2322)
PASS: lldb-api :: commands/platform/file/close/TestPlatformFileClose.py (195 of 2322)
PASS: lldb-api :: commands/platform/file/read/TestPlatformFileRead.py (196 of 2322)
PASS: lldb-api :: commands/memory/read/TestMemoryRead.py (197 of 2322)
PASS: lldb-api :: commands/platform/connect/TestPlatformConnect.py (198 of 2322)
UNSUPPORTED: lldb-api :: commands/platform/sdk/TestPlatformSDK.py (199 of 2322)
PASS: lldb-api :: commands/plugin/TestPlugin.py (200 of 2322)
PASS: lldb-api :: commands/platform/process/launch/TestPlatformProcessLaunch.py (201 of 2322)
PASS: lldb-api :: commands/platform/process/list/TestProcessList.py (202 of 2322)
UNRESOLVED: lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py (203 of 2322)
******************** TEST 'lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --cmake-build-type Release /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/commands/gui/spawn-threads -p TestGuiSpawnThreads.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision add9079dcf0f0a34e03e5453e754d8fc40116785)
  clang revision add9079dcf0f0a34e03e5453e754d8fc40116785
  llvm revision add9079dcf0f0a34e03e5453e754d8fc40116785
Skipping the following test categories: ['libc++', 'msvcstl', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
FAIL: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_gui (TestGuiSpawnThreads.TestGuiSpawnThreadsTest)
======================================================================
ERROR: test_gui (TestGuiSpawnThreads.TestGuiSpawnThreadsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/decorators.py", line 155, in wrapper
    return func(*args, **kwargs)
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/commands/gui/spawn-threads/TestGuiSpawnThreads.py", line 44, in test_gui
    self.child.expect_exact(f"thread #{i + 2}: tid =")
  File "/usr/local/lib/python3.10/dist-packages/pexpect/spawnbase.py", line 432, in expect_exact
    return exp.expect_loop(timeout)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 179, in expect_loop
    return self.eof(e)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 122, in eof
    raise exc
pexpect.exceptions.EOF: End Of File (EOF). Exception style platform.
<pexpect.pty_spawn.spawn object at 0xf16a9ce71ab0>
command: /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb
args: ['/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb', '--no-lldbinit', '--no-use-colors', '-O', 'settings clear --all', '-O', 'settings set symbols.enable-external-lookup false', '-O', 'settings set target.inherit-tcc true', '-O', 'settings set target.disable-aslr false', '-O', 'settings set target.detach-on-error false', '-O', 'settings set target.auto-apply-fixits false', '-O', 'settings set plugin.process.gdb-remote.packet-timeout 60', '-O', 'settings set symbols.clang-modules-cache-path "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api"', '-O', 'settings set use-color false', '-O', 'settings set show-statusline false', '--file', '/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/commands/gui/spawn-threads/TestGuiSpawnThreads.test_gui/a.out']
buffer (last 100 chars): b''
before (last 100 chars): b'2 0x0000ca1d01e24b30 _start (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb+0x44b30)\n'
after: <class 'pexpect.exceptions.EOF'>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

6 participants