-
Notifications
You must be signed in to change notification settings - Fork 15.4k
release/21.x: [POWERPC] Fixes an error in the handling of the MTVSRBMI instruction for big-endian (#151565) #154138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@diggerlin What do you think about merging this PR to the release branch? |
|
@llvm/pr-subscribers-backend-powerpc Author: None (llvmbot) ChangesBackport 23b3203 Requested by: @amy-kwan Full diff: https://github.com/llvm/llvm-project/pull/154138.diff 2 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f179873b4dbd2..67f59ed507f38 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9594,12 +9594,14 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
return false;
}
-bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
+bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
+ bool IsLittleEndian) {
assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
BitMask.clearAllBits();
EVT VT = BVN.getValueType(0);
- APInt ConstValue(VT.getSizeInBits(), 0);
+ unsigned VTSize = VT.getSizeInBits();
+ APInt ConstValue(VTSize, 0);
unsigned EltWidth = VT.getScalarSizeInBits();
@@ -9609,8 +9611,10 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
if (!CN)
return false;
-
- ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+ // The elements in a vector register are ordered in reverse byte order
+ // between little-endian and big-endian modes.
+ ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
+ IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
BitPos += EltWidth;
}
@@ -9641,7 +9645,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// we do not convert it to MTVSRBMI.
// The xxleqv instruction sets a vector with all ones.
// The xxlxor instruction sets a vector with all zeros.
- if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
+ if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
+ BitMask != 0 && BitMask != 0xffff) {
SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
MachineSDNode *MSDNode =
DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
index 232014db9a012..a9503f77c3090 100644
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
@@ -2,22 +2,87 @@
; Verify whether the generated assembly for the following function includes the mtvsrbmi instruction.
; vector unsigned char v00FF()
; {
-; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
-; return x;
+; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
+; return x;
+; }
+; vector unsigned short short00FF()
+; {
+; vector unsigned short x = { 0xFF, 0,0,0, 0,0,0,0};
+; return x;
+; }
+; vector unsigned int int00FF()
+; {
+; vector unsigned int x = { 0xFF, 0,0,0};
+; return x;
+; }
+; vector unsigned long long longlong00FF()
+; {
+; vector unsigned long long x = { 0xFF, 0};
+; return x;
; }
; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix -mcpu=pwr10 -verify-machineinstrs \
-; RUN: | FileCheck %s --check-prefix=CHECK
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+
+; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr10 -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+; CHECK-NOT: .byte 255
+; CHECK-NOT: .byte 0
define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK-NOT: L..CPI0_0:
-; CHECK-NOT: .byte 255 # 0xff
-; CHECK-NOT: .byte 0 # 0x0
-
-; CHECK-LABEL: _Z5v00FFv:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mtvsrbmi v2, 1
-; CHECK-NEXT: blr
+; CHECK-BE-LABEL: _Z5v00FFv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mtvsrbmi v2, 32768
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: _Z5v00FFv:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtvsrbmi v2, 1
+; CHECK-LE-NEXT: blr
+
entry:
ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
}
+
+define dso_local noundef range(i16 0, 256) <8 x i16> @_Z9short00FFv() {
+; CHECK-BE-LABEL: _Z9short00FFv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mtvsrbmi v2, 16384
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: _Z9short00FFv:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtvsrbmi v2, 1
+; CHECK-LE-NEXT: blr
+entry:
+ ret <8 x i16> <i16 255, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+}
+
+define dso_local noundef range(i32 0, 256) <4 x i32> @_Z7int00FFv() {
+; CHECK-BE-LABEL: _Z7int00FFv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mtvsrbmi v2, 4096
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: _Z7int00FFv:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtvsrbmi v2, 1
+; CHECK-LE-NEXT: blr
+entry:
+ ret <4 x i32> <i32 255, i32 0, i32 0, i32 0>
+}
+
+define dso_local noundef range(i64 0, 256) <2 x i64> @_Z12longlong00FFv() {
+; CHECK-BE-LABEL: _Z12longlong00FFv:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mtvsrbmi v2, 256
+; CHECK-BE-NEXT: blr
+;
+; CHECK-LE-LABEL: _Z12longlong00FFv:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtvsrbmi v2, 1
+; CHECK-LE-NEXT: blr
+entry:
+ ret <2 x i64> <i64 255, i64 0>
+}
|
|
FYI - requesting backport as the original issue (45909ec) is also in the LLVM 21.x |
diggerlin
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…for big-endian (llvm#151565) The patch fixed a bug introduced patch [[PowePC] using MTVSRBMI instruction instead of constant pool in power10+](llvm#144084 (comment)). The issue arose because the layout of vector register elements differs between little-endian and big-endian modes — specifically, the elements appear in reverse order. This led to incorrect behavior when loading constants using MTVSRBMI in big-endian configurations. (cherry picked from commit 23b3203)
|
@amy-kwan (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport 23b3203
Requested by: @amy-kwan