Skip to content

Commit 74fa498

Browse files
diggerlinkrishna2803
authored andcommitted
[POWERPC] Fixes an error in the handling of the MTVSRBMI instruction for big-endian (llvm#151565)
The patch fixed a bug introduced patch [[PowePC] using MTVSRBMI instruction instead of constant pool in power10+](llvm#144084 (comment)). The issue arose because the layout of vector register elements differs between little-endian and big-endian modes — specifically, the elements appear in reverse order. This led to incorrect behavior when loading constants using MTVSRBMI in big-endian configurations.
1 parent a29e45b commit 74fa498

File tree

2 files changed

+86
-16
lines changed

2 files changed

+86
-16
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9593,12 +9593,14 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
95939593
return false;
95949594
}
95959595

9596-
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
9596+
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
9597+
bool IsLittleEndian) {
95979598
assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
95989599

95999600
BitMask.clearAllBits();
96009601
EVT VT = BVN.getValueType(0);
9601-
APInt ConstValue(VT.getSizeInBits(), 0);
9602+
unsigned VTSize = VT.getSizeInBits();
9603+
APInt ConstValue(VTSize, 0);
96029604

96039605
unsigned EltWidth = VT.getScalarSizeInBits();
96049606

@@ -9608,8 +9610,10 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
96089610

96099611
if (!CN)
96109612
return false;
9611-
9612-
ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
9613+
// The elements in a vector register are ordered in reverse byte order
9614+
// between little-endian and big-endian modes.
9615+
ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9616+
IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
96139617
BitPos += EltWidth;
96149618
}
96159619

@@ -9640,7 +9644,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
96409644
// we do not convert it to MTVSRBMI.
96419645
// The xxleqv instruction sets a vector with all ones.
96429646
// The xxlxor instruction sets a vector with all zeros.
9643-
if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
9647+
if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9648+
BitMask != 0 && BitMask != 0xffff) {
96449649
SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
96459650
MachineSDNode *MSDNode =
96469651
DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);

llvm/test/CodeGen/PowerPC/mtvsrbmi.ll

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,87 @@
22
; Verify whether the generated assembly for the following function includes the mtvsrbmi instruction.
33
; vector unsigned char v00FF()
44
; {
5-
; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
6-
; return x;
5+
; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
6+
; return x;
7+
; }
8+
; vector unsigned short short00FF()
9+
; {
10+
; vector unsigned short x = { 0xFF, 0,0,0, 0,0,0,0};
11+
; return x;
12+
; }
13+
; vector unsigned int int00FF()
14+
; {
15+
; vector unsigned int x = { 0xFF, 0,0,0};
16+
; return x;
17+
; }
18+
; vector unsigned long long longlong00FF()
19+
; {
20+
; vector unsigned long long x = { 0xFF, 0};
21+
; return x;
722
; }
823

924
; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix -mcpu=pwr10 -verify-machineinstrs \
10-
; RUN: | FileCheck %s --check-prefix=CHECK
25+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-BE
26+
27+
; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr10 -verify-machineinstrs \
28+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-LE
29+
30+
; CHECK-NOT: .byte 255
31+
; CHECK-NOT: .byte 0
1132

1233
define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
13-
; CHECK-NOT: L..CPI0_0:
14-
; CHECK-NOT: .byte 255 # 0xff
15-
; CHECK-NOT: .byte 0 # 0x0
16-
17-
; CHECK-LABEL: _Z5v00FFv:
18-
; CHECK: # %bb.0: # %entry
19-
; CHECK-NEXT: mtvsrbmi v2, 1
20-
; CHECK-NEXT: blr
34+
; CHECK-BE-LABEL: _Z5v00FFv:
35+
; CHECK-BE: # %bb.0: # %entry
36+
; CHECK-BE-NEXT: mtvsrbmi v2, 32768
37+
; CHECK-BE-NEXT: blr
38+
;
39+
; CHECK-LE-LABEL: _Z5v00FFv:
40+
; CHECK-LE: # %bb.0: # %entry
41+
; CHECK-LE-NEXT: mtvsrbmi v2, 1
42+
; CHECK-LE-NEXT: blr
43+
2144
entry:
2245
ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
2346
}
47+
48+
define dso_local noundef range(i16 0, 256) <8 x i16> @_Z9short00FFv() {
49+
; CHECK-BE-LABEL: _Z9short00FFv:
50+
; CHECK-BE: # %bb.0: # %entry
51+
; CHECK-BE-NEXT: mtvsrbmi v2, 16384
52+
; CHECK-BE-NEXT: blr
53+
;
54+
; CHECK-LE-LABEL: _Z9short00FFv:
55+
; CHECK-LE: # %bb.0: # %entry
56+
; CHECK-LE-NEXT: mtvsrbmi v2, 1
57+
; CHECK-LE-NEXT: blr
58+
entry:
59+
ret <8 x i16> <i16 255, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
60+
}
61+
62+
define dso_local noundef range(i32 0, 256) <4 x i32> @_Z7int00FFv() {
63+
; CHECK-BE-LABEL: _Z7int00FFv:
64+
; CHECK-BE: # %bb.0: # %entry
65+
; CHECK-BE-NEXT: mtvsrbmi v2, 4096
66+
; CHECK-BE-NEXT: blr
67+
;
68+
; CHECK-LE-LABEL: _Z7int00FFv:
69+
; CHECK-LE: # %bb.0: # %entry
70+
; CHECK-LE-NEXT: mtvsrbmi v2, 1
71+
; CHECK-LE-NEXT: blr
72+
entry:
73+
ret <4 x i32> <i32 255, i32 0, i32 0, i32 0>
74+
}
75+
76+
define dso_local noundef range(i64 0, 256) <2 x i64> @_Z12longlong00FFv() {
77+
; CHECK-BE-LABEL: _Z12longlong00FFv:
78+
; CHECK-BE: # %bb.0: # %entry
79+
; CHECK-BE-NEXT: mtvsrbmi v2, 256
80+
; CHECK-BE-NEXT: blr
81+
;
82+
; CHECK-LE-LABEL: _Z12longlong00FFv:
83+
; CHECK-LE: # %bb.0: # %entry
84+
; CHECK-LE-NEXT: mtvsrbmi v2, 1
85+
; CHECK-LE-NEXT: blr
86+
entry:
87+
ret <2 x i64> <i64 255, i64 0>
88+
}

0 commit comments

Comments
 (0)