Skip to content

Conversation

@lei137
Copy link
Contributor

@lei137 lei137 commented Jun 16, 2025

Corrects the erroneous assumption that CR-logical operation's operands are always defined by a subreg copy.

Fixes #141643
Patch by Nemanja Ivanovic

@lei137 lei137 requested review from arsenm and nemanjai June 16, 2025 17:59
@lei137 lei137 self-assigned this Jun 16, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 16, 2025

@llvm/pr-subscribers-backend-powerpc

Author: Lei Huang (lei137)

Changes

Corrects the erroneous assumption that CR-logical operation's operands are always defined by a subreg copy.

Fixes #141643
Patch by Nemanja Ivanovic


Full diff: https://github.com/llvm/llvm-project/pull/144405.diff

2 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp (+20-17)
  • (added) llvm/test/CodeGen/PowerPC/crreduce-reg.mir (+66)
diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index ec20ad2565e68..c28ff4c468ad2 100644
--- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -108,6 +108,8 @@ struct BlockSplitInfo {
   MachineInstr *OrigBranch;
   MachineInstr *SplitBefore;
   MachineInstr *SplitCond;
+  unsigned OrigSubreg;
+  unsigned SplitCondSubreg;
   bool InvertNewBranch;
   bool InvertOrigBranch;
   bool BranchToFallThrough;
@@ -220,7 +222,7 @@ static bool splitMBB(BlockSplitInfo &BSI) {
   // Add the branches to ThisMBB.
   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
           TII->get(NewBROpcode))
-      .addReg(BSI.SplitCond->getOperand(0).getReg())
+      .addReg(BSI.SplitCond->getOperand(0).getReg(), 0, BSI.SplitCondSubreg)
       .addMBB(NewBRTarget);
   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
           TII->get(PPC::B))
@@ -234,6 +236,7 @@ static bool splitMBB(BlockSplitInfo &BSI) {
     assert(FirstTerminator->getOperand(0).isReg() &&
            "Can't update condition of unconditional branch.");
     FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg());
+    FirstTerminator->getOperand(0).setSubReg(BSI.OrigSubreg);
   }
   if (BSI.InvertOrigBranch)
     FirstTerminator->setDesc(TII->get(InvertedOpcode));
@@ -471,6 +474,8 @@ PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
   } else {
     MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
                                            Ret.SubregDef1, Ret.CopyDefs.first);
+    if (Ret.SubregDef1 == 0)
+      Ret.SubregDef1 = MIParam.getOperand(1).getSubReg();
     assert(Def1 && "Must be able to find a definition of operand 1.");
     Ret.DefsSingleUse &=
       MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
@@ -481,6 +486,8 @@ PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
       MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
                                              Ret.SubregDef2,
                                              Ret.CopyDefs.second);
+      if (Ret.SubregDef2 == 0)
+        Ret.SubregDef2 = MIParam.getOperand(2).getSubReg();
       assert(Def2 && "Must be able to find a definition of operand 2.");
       Ret.DefsSingleUse &=
         MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
@@ -535,7 +542,6 @@ PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
 MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
                                                      unsigned &Subreg,
                                                      MachineInstr *&CpDef) {
-  Subreg = -1;
   if (!Register::isVirtualRegister(Reg))
     return nullptr;
   MachineInstr *Copy = MRI->getVRegDef(Reg);
@@ -543,18 +549,10 @@ MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
   if (!Copy->isCopy())
     return Copy;
   Register CopySrc = Copy->getOperand(1).getReg();
-  Subreg = Copy->getOperand(1).getSubReg();
+  // If the copy defines a register with a subreg, set that as the Subreg.
+  Subreg = Copy->getOperand(0).getSubReg();
   if (!CopySrc.isVirtual()) {
     const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
-    // Set the Subreg
-    if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ)
-      Subreg = PPC::sub_eq;
-    if (CopySrc == PPC::CR0LT || CopySrc == PPC::CR6LT)
-      Subreg = PPC::sub_lt;
-    if (CopySrc == PPC::CR0GT || CopySrc == PPC::CR6GT)
-      Subreg = PPC::sub_gt;
-    if (CopySrc == PPC::CR0UN || CopySrc == PPC::CR6UN)
-      Subreg = PPC::sub_un;
     // Loop backwards and return the first MI that modifies the physical CR Reg.
     MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin();
     while (Me != B)
@@ -682,16 +680,21 @@ bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) {
   computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1,
                                   InvertNewBranch, InvertOrigBranch,
                                   TargetIsFallThrough);
-  MachineInstr *SplitCond =
-    UsingDef1 ? CRI.CopyDefs.second : CRI.CopyDefs.first;
+  MachineInstr *NewCond = CRI.CopyDefs.first;
+  MachineInstr *SplitCond = CRI.CopyDefs.second;
+  if (!UsingDef1) {
+    std::swap(NewCond, SplitCond);
+    std::swap(CRI.SubregDef1, CRI.SubregDef2);
+  }
   LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy"));
   LLVM_DEBUG(dbgs() << " the original branch and the target is the "
                     << (TargetIsFallThrough ? "fallthrough block\n"
                                             : "orig. target block\n"));
   LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump());
-  BlockSplitInfo BSI { Branch, SplitBefore, SplitCond, InvertNewBranch,
-    InvertOrigBranch, TargetIsFallThrough, MBPI, CRI.MI,
-    UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second };
+  BlockSplitInfo BSI{
+      Branch,         SplitBefore,     SplitCond,        CRI.SubregDef1,
+      CRI.SubregDef2, InvertNewBranch, InvertOrigBranch, TargetIsFallThrough,
+      MBPI,           CRI.MI,          NewCond};
   bool Changed = splitMBB(BSI);
   // If we've split on a CR logical that is fed by a CR logical,
   // recompute the source CR logical as it may be usable for splitting.
diff --git a/llvm/test/CodeGen/PowerPC/crreduce-reg.mir b/llvm/test/CodeGen/PowerPC/crreduce-reg.mir
new file mode 100644
index 0000000000000..1b6045d82f9dd
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/crreduce-reg.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=powerpc64-unknown-linux-gnu -run-pass=ppc-reduce-cr-ops \
+# RUN:   -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name:            subreg_folding_regression
+tracksRegLiveness: true
+isSSA:           true
+body:             |
+  ; CHECK-LABEL: name: subreg_folding_regression
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $x3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+  ; CHECK-NEXT:   [[LD:%[0-9]+]]:g8rc = LD 0, [[COPY]] :: (load (s64))
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x20000000), %bb.3(0x60000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[LD]], %bb.0, %3, %bb.3, %4, %bb.2
+  ; CHECK-NEXT:   [[LBZ:%[0-9]+]]:gprc = LBZ 0, [[PHI]] :: (load (s8))
+  ; CHECK-NEXT:   [[CMPWI:%[0-9]+]]:crrc = CMPWI killed [[LBZ]], 0
+  ; CHECK-NEXT:   [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[PHI]], 1
+  ; CHECK-NEXT:   STD [[ADDI8_]], 0, [[COPY]] :: (store (s64))
+  ; CHECK-NEXT:   [[LBZ1:%[0-9]+]]:gprc = LBZ 1, [[PHI]] :: (load (s8))
+  ; CHECK-NEXT:   BCn [[CMPWI]].sub_lt, %bb.2
+  ; CHECK-NEXT:   B %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[CMPWI1:%[0-9]+]]:crrc = CMPWI killed [[LBZ1]], 0
+  ; CHECK-NEXT:   BC killed [[CMPWI1]].sub_eq, %bb.1
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[ADDI8_1:%[0-9]+]]:g8rc = nuw ADDI8 [[PHI]], 2
+  ; CHECK-NEXT:   STD [[ADDI8_1]], 0, [[COPY]] :: (store (s64))
+  ; CHECK-NEXT:   B %bb.1
+  bb.0:
+    liveins: $x3
+
+    %0:g8rc_and_g8rc_nox0 = COPY $x3
+    %1:g8rc = LD 0, %0 :: (load (s64))
+
+  bb.1:
+    %2:g8rc_and_g8rc_nox0 = PHI %1, %bb.0, %3, %bb.1, %4, %bb.2
+    %5:gprc = LBZ 0, %2 :: (load (s8))
+    %6:crrc = CMPWI killed %5, 0
+    %3:g8rc = nuw ADDI8 %2, 1
+    STD %3, 0, %0 :: (store (s64))
+    %7:gprc = LBZ 1, %2 :: (load (s8))
+    %8:crrc = CMPWI killed %7, 0
+    %9:crbitrc = CRAND %8.sub_eq, %6.sub_lt
+    BC killed %9, %bb.1
+    B %bb.2
+
+  bb.2:
+    %4:g8rc = nuw ADDI8 %2, 2
+    STD %4, 0, %0 :: (store (s64))
+    B %bb.1
+
+...

Comment on lines 552 to 553
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be an SSA pass, and subregister defs are illegal. This cannot happen

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes. OK, I think the Subreg handling should completely be removed from this function.

Comment on lines 489 to 490
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are 2 subregisters, do you need to compose? I'd expect something like this to be unconditional or a compose

Comment on lines 552 to 553
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes. OK, I think the Subreg handling should completely be removed from this function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the handling for Subreg is removed from lookThroughCRCopy(), this condition and the one on line 489 can just go away and the assignments to Ret.SubregDef1 and Ret.SubregDef2 can be unconditional.

@lei137 lei137 requested review from arsenm and nemanjai June 18, 2025 17:57
@lei137
Copy link
Contributor Author

lei137 commented Jun 23, 2025

gentle ping

lei137 added 2 commits July 2, 2025 17:12
Corrects the erroneous assumption that CR-logical operation's operands are
always defined by a subreg copy.

Fixes llvm#141643 (comment)
Patch by Nemanja Ivanovic
@lei137 lei137 merged commit 0a822f8 into llvm:main Jul 2, 2025
7 of 9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jul 2, 2025

LLVM Buildbot has detected a new failure on builder mlir-nvidia running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/15479

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x5b347bb99130 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 84]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x5b347bb99130 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 84] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

@lei137 lei137 deleted the issue141643 branch September 5, 2025 15:46
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

ppc-reduce-cr-ops mishandles subregister uses and emits verifier error

5 participants