Skip to content

Commit d1ad085

Browse files
authored
Fix [PowerPC] llc crashed at -O1/O2/O3: Assertion `isImm() && "Wrong MachineOperand mutator"' failed. (#170548)
Fixed issue [[PowerPC] llc crashed at -O1/O2/O3: Assertion `isImm() && "Wrong MachineOperand mutator"' failed.](#167672) the root cause of the crash, the IMM operand is in different operand num of the instruction PPC::XXSPLTW and PPC::XXSPLTB/PPC::XXSPLTH. and the patch also fix a potential bug that the new element index of PPC::XXSPLTB/PPC::XXSPLTH/XXSPLTW use the same logic. It should be different .We need to convert the element index into the proper unit (byte for VSPLTB, halfword for VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its ShiftImm in 32-bit word units.
1 parent 447af32 commit d1ad085

File tree

2 files changed

+59
-9
lines changed

2 files changed

+59
-9
lines changed

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,7 @@ bool PPCMIPeephole::simplifyCode() {
797797
case PPC::VSPLTH:
798798
case PPC::XXSPLTW: {
799799
unsigned MyOpcode = MI.getOpcode();
800+
// The operand number of the source register in the splat instruction.
800801
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
801802
Register TrueReg =
802803
TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
@@ -823,6 +824,7 @@ bool PPCMIPeephole::simplifyCode() {
823824
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
824825
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
825826
(MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
827+
826828
// If the instruction[s] that feed this splat have already splat
827829
// the value, this splat is redundant.
828830
if (AlreadySplat) {
@@ -835,30 +837,56 @@ bool PPCMIPeephole::simplifyCode() {
835837
ToErase = &MI;
836838
Simplified = true;
837839
}
840+
838841
// Splat fed by a shift. Usually when we align value to splat into
839842
// vector element zero.
840843
if (DefOpcode == PPC::XXSLDWI) {
841-
Register ShiftRes = DefMI->getOperand(0).getReg();
842844
Register ShiftOp1 = DefMI->getOperand(1).getReg();
843-
Register ShiftOp2 = DefMI->getOperand(2).getReg();
844-
unsigned ShiftImm = DefMI->getOperand(3).getImm();
845-
unsigned SplatImm =
846-
MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
847-
if (ShiftOp1 == ShiftOp2) {
848-
unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
849-
if (MRI->hasOneNonDBGUse(ShiftRes)) {
845+
846+
if (ShiftOp1 == DefMI->getOperand(2).getReg()) {
847+
// For example, We can erase XXSLDWI from in following:
848+
// %2:vrrc = XXSLDWI killed %1:vrrc, %1:vrrc, 1
849+
// %6:vrrc = VSPLTB 15, killed %2:vrrc
850+
// %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
851+
//
852+
// --->
853+
//
854+
// %6:vrrc = VSPLTB 3, killed %1:vrrc
855+
// %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
856+
857+
if (MRI->hasOneNonDBGUse(DefMI->getOperand(0).getReg())) {
850858
LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
851859
LLVM_DEBUG(DefMI->dump());
852860
ToErase = DefMI;
853861
}
854862
Simplified = true;
863+
unsigned ShiftImm = DefMI->getOperand(3).getImm();
864+
// The operand number of the splat Imm in the instruction.
865+
unsigned SplatImmNo = MyOpcode == PPC::XXSPLTW ? 2 : 1;
866+
unsigned SplatImm = MI.getOperand(SplatImmNo).getImm();
867+
868+
// Calculate the new splat-element immediate. We need to convert the
869+
// element index into the proper unit (byte for VSPLTB, halfword for
870+
// VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its
871+
// ShiftImm in 32-bit word units.
872+
auto CalculateNewElementIdx = [&](unsigned Opcode) {
873+
if (Opcode == PPC::VSPLTB)
874+
return (SplatImm + ShiftImm * 4) & 0xF;
875+
else if (Opcode == PPC::VSPLTH)
876+
return (SplatImm + ShiftImm * 2) & 0x7;
877+
else
878+
return (SplatImm + ShiftImm) & 0x3;
879+
};
880+
881+
unsigned NewElem = CalculateNewElementIdx(MyOpcode);
882+
855883
LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
856884
<< " to " << NewElem << " in instruction: ");
857885
LLVM_DEBUG(MI.dump());
858886
addRegToUpdate(MI.getOperand(OpNo).getReg());
859887
addRegToUpdate(ShiftOp1);
860888
MI.getOperand(OpNo).setReg(ShiftOp1);
861-
MI.getOperand(2).setImm(NewElem);
889+
MI.getOperand(SplatImmNo).setImm(NewElem);
862890
}
863891
}
864892
break;
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \
3+
; RUN: FileCheck %s
4+
5+
6+
define <4 x i8> @backsmith_pure_1(<8 x i32> %0) {
7+
; CHECK-LABEL: backsmith_pure_1:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: ld r3, L..C0(r2) # %const.0
10+
; CHECK-NEXT: xxsldwi vs34, vs35, vs35, 1
11+
; CHECK-NEXT: lxvw4x vs36, 0, r3
12+
; CHECK-NEXT: vspltb v3, v3, 3
13+
; CHECK-NEXT: vperm v2, v2, v2, v4
14+
; CHECK-NEXT: xxland vs34, vs35, vs34
15+
; CHECK-NEXT: blr
16+
entry:
17+
%shuffle = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
18+
%conv4 = trunc <4 x i32> %shuffle to <4 x i8>
19+
%shift = shufflevector <4 x i8> %conv4, <4 x i8> zeroinitializer, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
20+
%foldExtExtBinop = and <4 x i8> %shift, %conv4
21+
ret <4 x i8> %foldExtExtBinop
22+
}

0 commit comments

Comments
 (0)