Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9586,12 +9586,14 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
return false;
}

bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
bool IsLittleEndian) {
assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");

BitMask.clearAllBits();
EVT VT = BVN.getValueType(0);
APInt ConstValue(VT.getSizeInBits(), 0);
unsigned VTSize = VT.getSizeInBits();
APInt ConstValue(VTSize, 0);

unsigned EltWidth = VT.getScalarSizeInBits();

Expand All @@ -9601,8 +9603,10 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {

if (!CN)
return false;

ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
// The elements in a vector register are ordered in reverse byte order
// between little-endian and big-endian modes.
ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
BitPos += EltWidth;
}

Expand Down Expand Up @@ -9633,7 +9637,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// we do not convert it to MTVSRBMI.
// The xxleqv instruction sets a vector with all ones.
// The xxlxor instruction sets a vector with all zeros.
if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
BitMask != 0 && BitMask != 0xffff) {
SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
MachineSDNode *MSDNode =
DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
Expand Down
87 changes: 76 additions & 11 deletions llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,87 @@
; Verify whether the generated assembly for the following function includes the mtvsrbmi instruction.
; vector unsigned char v00FF()
; {
; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
; return x;
; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
; return x;
; }
; vector unsigned short short00FF()
; {
; vector unsigned short x = { 0xFF, 0,0,0, 0,0,0,0};
; return x;
; }
; vector unsigned int int00FF()
; {
; vector unsigned int x = { 0xFF, 0,0,0};
; return x;
; }
; vector unsigned long long longlong00FF()
; {
; vector unsigned long long x = { 0xFF, 0};
; return x;
; }

; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix -mcpu=pwr10 -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefix=CHECK
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-BE

; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr10 -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-LE

; CHECK-NOT: .byte 255
; CHECK-NOT: .byte 0
Comment on lines +30 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these auto generated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not auto-generated.


define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
; CHECK-NOT: L..CPI0_0:
; CHECK-NOT: .byte 255 # 0xff
; CHECK-NOT: .byte 0 # 0x0

; CHECK-LABEL: _Z5v00FFv:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mtvsrbmi v2, 1
; CHECK-NEXT: blr
; CHECK-BE-LABEL: _Z5v00FFv:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrbmi v2, 32768
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: _Z5v00FFv:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: mtvsrbmi v2, 1
; CHECK-LE-NEXT: blr

entry:
ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
}

define dso_local noundef range(i16 0, 256) <8 x i16> @_Z9short00FFv() {
; CHECK-BE-LABEL: _Z9short00FFv:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrbmi v2, 16384
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: _Z9short00FFv:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: mtvsrbmi v2, 1
; CHECK-LE-NEXT: blr
entry:
ret <8 x i16> <i16 255, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
}

define dso_local noundef range(i32 0, 256) <4 x i32> @_Z7int00FFv() {
; CHECK-BE-LABEL: _Z7int00FFv:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrbmi v2, 4096
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: _Z7int00FFv:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: mtvsrbmi v2, 1
; CHECK-LE-NEXT: blr
entry:
ret <4 x i32> <i32 255, i32 0, i32 0, i32 0>
}

define dso_local noundef range(i64 0, 256) <2 x i64> @_Z12longlong00FFv() {
; CHECK-BE-LABEL: _Z12longlong00FFv:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrbmi v2, 256
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: _Z12longlong00FFv:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: mtvsrbmi v2, 1
; CHECK-LE-NEXT: blr
entry:
ret <2 x i64> <i64 255, i64 0>
}