Skip to content

Conversation

@diggerlin
Copy link
Contributor

@diggerlin diggerlin commented Dec 3, 2025

Fixed issue
[PowerPC] llc crashed at -O1/O2/O3: Assertion `isImm() && "Wrong MachineOperand mutator"' failed.

the root cause of the crash, the IMM operand is in different operand num of the instruction PPC::XXSPLTW and PPC::XXSPLTB/PPC::XXSPLTH.

and the patch also fix a potential bug that the new element index of PPC::XXSPLTB/PPC::XXSPLTH/XXSPLTW use the same logic. It should be different .We need to convert the element index into the proper unit (byte for VSPLTB, halfword for VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its ShiftImm in 32-bit word units.

@llvmbot
Copy link
Member

llvmbot commented Dec 3, 2025

@llvm/pr-subscribers-backend-powerpc

Author: zhijian lin (diggerlin)

Changes

Fixed issue
[PowerPC] llc crashed at -O1/O2/O3: Assertion `isImm() && "Wrong MachineOperand mutator"' failed.

the root cause of the crash, the IMM operand is different operand num in the instruction


Full diff: https://github.com/llvm/llvm-project/pull/170548.diff

2 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/PPCMIPeephole.cpp (+30-4)
  • (added) llvm/test/CodeGen/PowerPC/splat-after-xxsldwi.ll (+22)
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 18350650bfe2d..a45d1f3a72d7e 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -797,7 +797,12 @@ bool PPCMIPeephole::simplifyCode() {
       case PPC::VSPLTH:
       case PPC::XXSPLTW: {
         unsigned MyOpcode = MI.getOpcode();
+
+        // The operand number of the source register in the splat instruction.
         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
+
+        // The operand number of the splat Imm in the instruction.
+        unsigned SplatImmNo = MyOpcode == PPC::XXSPLTW ? 2 : 1;
         Register TrueReg =
           TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
         if (!TrueReg.isVirtual())
@@ -837,15 +842,36 @@ bool PPCMIPeephole::simplifyCode() {
         }
         // Splat fed by a shift. Usually when we align value to splat into
         // vector element zero.
+
         if (DefOpcode == PPC::XXSLDWI) {
           Register ShiftRes = DefMI->getOperand(0).getReg();
           Register ShiftOp1 = DefMI->getOperand(1).getReg();
           Register ShiftOp2 = DefMI->getOperand(2).getReg();
           unsigned ShiftImm = DefMI->getOperand(3).getImm();
-          unsigned SplatImm =
-              MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
+          unsigned SplatImm = MI.getOperand(SplatImmNo).getImm();
+
           if (ShiftOp1 == ShiftOp2) {
-            unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
+            // Calculate the new splat-element immediate. We need to convert the
+            // element index into the proper unit (byte for VSPLTB, halfword for
+            // VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its
+            // ShiftImm in 32-bit word units.
+            unsigned NewElem = 0;
+            if (MyOpcode == PPC::VSPLTB)
+              NewElem = (SplatImm + ShiftImm * 4) & 0xF;
+            else if (MyOpcode == PPC::VSPLTH)
+              NewElem = (SplatImm + ShiftImm * 2) & 0x7;
+            else
+              NewElem = (SplatImm + ShiftImm) & 0x3;
+
+            // For example, We can erase XXSLDWI from in following:
+            //    %2:vrrc = XXSLDWI killed %1:vrrc, %1:vrrc, 1
+            //    %6:vrrc = VSPLTB 15, killed %2:vrrc
+            //    %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
+            //
+            // --->
+            //
+            //     %6:vrrc = VSPLTB 3, killed %1:vrrc
+            //     %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
             if (MRI->hasOneNonDBGUse(ShiftRes)) {
               LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
               LLVM_DEBUG(DefMI->dump());
@@ -858,7 +884,7 @@ bool PPCMIPeephole::simplifyCode() {
             addRegToUpdate(MI.getOperand(OpNo).getReg());
             addRegToUpdate(ShiftOp1);
             MI.getOperand(OpNo).setReg(ShiftOp1);
-            MI.getOperand(2).setImm(NewElem);
+            MI.getOperand(SplatImmNo).setImm(NewElem);
           }
         }
         break;
diff --git a/llvm/test/CodeGen/PowerPC/splat-after-xxsldwi.ll b/llvm/test/CodeGen/PowerPC/splat-after-xxsldwi.ll
new file mode 100644
index 0000000000000..d0e96e3fbbf56
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/splat-after-xxsldwi.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s  | \
+; RUN:   FileCheck %s
+
+
+define <4 x i8> @backsmith_pure_1(<8 x i32> %0) {
+; CHECK-LABEL: backsmith_pure_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld r3, L..C0(r2) # %const.0
+; CHECK-NEXT:    xxsldwi vs34, vs35, vs35, 1
+; CHECK-NEXT:    lxvw4x vs36, 0, r3
+; CHECK-NEXT:    vspltb v3, v3, 3
+; CHECK-NEXT:    vperm v2, v2, v2, v4
+; CHECK-NEXT:    xxland vs34, vs35, vs34
+; CHECK-NEXT:    blr
+entry:
+  %shuffle = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
+  %conv4 = trunc <4 x i32> %shuffle to <4 x i8>
+  %shift = shufflevector <4 x i8> %conv4, <4 x i8> zeroinitializer, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
+  %foldExtExtBinop = and <4 x i8> %shift, %conv4
+  ret <4 x i8> %foldExtExtBinop
+}

@zwuis
Copy link
Contributor

zwuis commented Dec 4, 2025

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants