Skip to content

Commit ed0258e

Browse files
committed
fix a bug of splat-after-xxsldwi
1 parent d7cc82b commit ed0258e

File tree

2 files changed

+52
-4
lines changed

2 files changed

+52
-4
lines changed

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,12 @@ bool PPCMIPeephole::simplifyCode() {
797797
case PPC::VSPLTH:
798798
case PPC::XXSPLTW: {
799799
unsigned MyOpcode = MI.getOpcode();
800+
801+
// The operand number of the source register in the splat instruction.
800802
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
803+
804+
// The operand number of the splat Imm in the instruction.
805+
unsigned SplatImmNo = MyOpcode == PPC::XXSPLTW ? 2 : 1;
801806
Register TrueReg =
802807
TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
803808
if (!TrueReg.isVirtual())
@@ -837,15 +842,36 @@ bool PPCMIPeephole::simplifyCode() {
837842
}
838843
// Splat fed by a shift. Usually when we align value to splat into
839844
// vector element zero.
845+
840846
if (DefOpcode == PPC::XXSLDWI) {
841847
Register ShiftRes = DefMI->getOperand(0).getReg();
842848
Register ShiftOp1 = DefMI->getOperand(1).getReg();
843849
Register ShiftOp2 = DefMI->getOperand(2).getReg();
844850
unsigned ShiftImm = DefMI->getOperand(3).getImm();
845-
unsigned SplatImm =
846-
MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
851+
unsigned SplatImm = MI.getOperand(SplatImmNo).getImm();
852+
847853
if (ShiftOp1 == ShiftOp2) {
848-
unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
854+
// Calculate the new splat-element immediate. We need to convert the
855+
// element index into the proper unit (byte for VSPLTB, halfword for
856+
// VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its
857+
// ShiftImm in 32-bit word units.
858+
unsigned NewElem = 0;
859+
if (MyOpcode == PPC::VSPLTB)
860+
NewElem = (SplatImm + ShiftImm * 4) & 0xF;
861+
else if (MyOpcode == PPC::VSPLTH)
862+
NewElem = (SplatImm + ShiftImm * 2) & 0x7;
863+
else
864+
NewElem = (SplatImm + ShiftImm) & 0x3;
865+
866+
// For example, We can erase XXSLDWI from in following:
867+
// %2:vrrc = XXSLDWI killed %1:vrrc, %1:vrrc, 1
868+
// %6:vrrc = VSPLTB 15, killed %2:vrrc
869+
// %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
870+
//
871+
// --->
872+
//
873+
// %6:vrrc = VSPLTB 3, killed %1:vrrc
874+
// %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
849875
if (MRI->hasOneNonDBGUse(ShiftRes)) {
850876
LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
851877
LLVM_DEBUG(DefMI->dump());
@@ -858,7 +884,7 @@ bool PPCMIPeephole::simplifyCode() {
858884
addRegToUpdate(MI.getOperand(OpNo).getReg());
859885
addRegToUpdate(ShiftOp1);
860886
MI.getOperand(OpNo).setReg(ShiftOp1);
861-
MI.getOperand(2).setImm(NewElem);
887+
MI.getOperand(SplatImmNo).setImm(NewElem);
862888
}
863889
}
864890
break;
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \
3+
; RUN: FileCheck %s
4+
5+
6+
define <4 x i8> @backsmith_pure_1(<8 x i32> %0) {
7+
; CHECK-LABEL: backsmith_pure_1:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: ld r3, L..C0(r2) # %const.0
10+
; CHECK-NEXT: xxsldwi vs34, vs35, vs35, 1
11+
; CHECK-NEXT: lxvw4x vs36, 0, r3
12+
; CHECK-NEXT: vspltb v3, v3, 3
13+
; CHECK-NEXT: vperm v2, v2, v2, v4
14+
; CHECK-NEXT: xxland vs34, vs35, vs34
15+
; CHECK-NEXT: blr
16+
entry:
17+
%shuffle = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
18+
%conv4 = trunc <4 x i32> %shuffle to <4 x i8>
19+
%shift = shufflevector <4 x i8> %conv4, <4 x i8> zeroinitializer, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
20+
%foldExtExtBinop = and <4 x i8> %shift, %conv4
21+
ret <4 x i8> %foldExtExtBinop
22+
}

0 commit comments

Comments
 (0)