Skip to content

Conversation

@llvmbot
Copy link
Member

@llvmbot llvmbot commented Aug 18, 2025

Backport 23b3203

Requested by: @amy-kwan

@llvmbot
Copy link
Member Author

llvmbot commented Aug 18, 2025

@diggerlin What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Member Author

llvmbot commented Aug 18, 2025

@llvm/pr-subscribers-backend-powerpc

Author: None (llvmbot)

Changes

Backport 23b3203

Requested by: @amy-kwan


Full diff: https://github.com/llvm/llvm-project/pull/154138.diff

2 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+10-5)
  • (modified) llvm/test/CodeGen/PowerPC/mtvsrbmi.ll (+76-11)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f179873b4dbd2..67f59ed507f38 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9594,12 +9594,14 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
   return false;
 }
 
-bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
+bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
+                     bool IsLittleEndian) {
   assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
 
   BitMask.clearAllBits();
   EVT VT = BVN.getValueType(0);
-  APInt ConstValue(VT.getSizeInBits(), 0);
+  unsigned VTSize = VT.getSizeInBits();
+  APInt ConstValue(VTSize, 0);
 
   unsigned EltWidth = VT.getScalarSizeInBits();
 
@@ -9609,8 +9611,10 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
 
     if (!CN)
       return false;
-
-    ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+    // The elements in a vector register are ordered in reverse byte order
+    // between little-endian and big-endian modes.
+    ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
+                          IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
     BitPos += EltWidth;
   }
 
@@ -9641,7 +9645,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     // we do not convert it to MTVSRBMI.
     // The xxleqv instruction sets a vector with all ones.
     // The xxlxor instruction sets a vector with all zeros.
-    if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
+    if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
+        BitMask != 0 && BitMask != 0xffff) {
       SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
       MachineSDNode *MSDNode =
           DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
index 232014db9a012..a9503f77c3090 100644
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
@@ -2,22 +2,87 @@
 ; Verify whether the generated assembly for the following function includes the mtvsrbmi instruction.
 ; vector unsigned char v00FF()
 ; {
-; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
-; return x;
+;   vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
+;   return x;
+; }
+; vector unsigned short short00FF()
+; {
+;   vector unsigned short x = { 0xFF, 0,0,0, 0,0,0,0};
+;   return x;
+; }
+; vector unsigned int int00FF()
+; {
+;   vector unsigned int x = { 0xFF, 0,0,0};
+;   return x;
+; }
+; vector unsigned long long  longlong00FF()
+; {
+;   vector unsigned long long x = { 0xFF, 0};
+;   return x;
 ; }
 
 ; RUN: llc < %s -ppc-asm-full-reg-names  -mtriple=powerpc-ibm-aix -mcpu=pwr10  -verify-machineinstrs \
-; RUN:   | FileCheck %s --check-prefix=CHECK
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+
+; RUN: llc < %s -ppc-asm-full-reg-names  -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr10  -verify-machineinstrs \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+; CHECK-NOT:   .byte   255
+; CHECK-NOT:   .byte   0
 
 define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK-NOT:      L..CPI0_0:
-; CHECK-NOT:   .byte   255                             # 0xff
-; CHECK-NOT:   .byte   0                               # 0x0
-
-; CHECK-LABEL: _Z5v00FFv:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mtvsrbmi v2, 1
-; CHECK-NEXT:    blr
+; CHECK-BE-LABEL: _Z5v00FFv:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrbmi v2, 32768
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-LE-LABEL: _Z5v00FFv:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtvsrbmi v2, 1
+; CHECK-LE-NEXT:    blr
+
 entry:
   ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
 }
+
+define dso_local noundef range(i16 0, 256) <8 x i16> @_Z9short00FFv() {
+; CHECK-BE-LABEL: _Z9short00FFv:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrbmi v2, 16384
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-LE-LABEL: _Z9short00FFv:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtvsrbmi v2, 1
+; CHECK-LE-NEXT:    blr
+entry:
+	  ret <8 x i16> <i16 255, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+}
+
+define dso_local noundef range(i32 0, 256) <4 x i32> @_Z7int00FFv() {
+; CHECK-BE-LABEL: _Z7int00FFv:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrbmi v2, 4096
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-LE-LABEL: _Z7int00FFv:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtvsrbmi v2, 1
+; CHECK-LE-NEXT:    blr
+entry:
+	  ret <4 x i32> <i32 255, i32 0, i32 0, i32 0>
+}
+
+define dso_local noundef range(i64 0, 256) <2 x i64> @_Z12longlong00FFv() {
+; CHECK-BE-LABEL: _Z12longlong00FFv:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrbmi v2, 256
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-LE-LABEL: _Z12longlong00FFv:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtvsrbmi v2, 1
+; CHECK-LE-NEXT:    blr
+entry:
+	  ret <2 x i64> <i64 255, i64 0>
+}

@amy-kwan
Copy link
Contributor

FYI - requesting backport as the original issue (45909ec) is also in the LLVM 21.x

@amy-kwan amy-kwan requested a review from RolandF77 August 18, 2025 16:25
Copy link
Contributor

@diggerlin diggerlin left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@github-project-automation github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status Aug 19, 2025
…for big-endian (llvm#151565)

The patch fixed a bug introduced patch [[PowePC] using MTVSRBMI
instruction instead of constant pool in
power10+](llvm#144084 (comment)).

The issue arose because the layout of vector register elements differs
between little-endian and big-endian modes — specifically, the elements
appear in reverse order. This led to incorrect behavior when loading
constants using MTVSRBMI in big-endian configurations.

(cherry picked from commit 23b3203)
@tru tru merged commit 2cf40a6 into llvm:release/21.x Aug 21, 2025
1 check was pending
@github-project-automation github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Aug 21, 2025
@github-actions
Copy link

@amy-kwan (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

Development

Successfully merging this pull request may close these issues.

4 participants