- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[Mips] Fix atomic min/max generate mips4 instructions when compiling for mips2 #159717
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
| @llvm/pr-subscribers-backend-mips Author: None (yingopq) Changes…for mips2 Fix #145411. Full diff: https://github.com/llvm/llvm-project/pull/159717.diff 2 Files Affected: 
 diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
index 34ff41f6e02da..f37ccac0b396e 100644
--- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -746,20 +746,41 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
     llvm_unreachable("Unknown pseudo atomic!");
   }
 
+  bool NoMovnInstr = (IsMin || IsMax) && !STI->hasMips4() && !STI->hasMips32();
   const BasicBlock *LLVM_BB = BB.getBasicBlock();
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop1MBB;
+  MachineBasicBlock *loop2MBB;
+  if (NoMovnInstr) {
+    loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  }
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineFunction::iterator It = ++BB.getIterator();
   MF->insert(It, loopMBB);
+  if (NoMovnInstr) {
+    MF->insert(It, loop1MBB);
+    MF->insert(It, loop2MBB);
+  }
   MF->insert(It, exitMBB);
 
   exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
   exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
 
   BB.addSuccessor(loopMBB, BranchProbability::getOne());
-  loopMBB->addSuccessor(exitMBB);
-  loopMBB->addSuccessor(loopMBB);
+  if (NoMovnInstr) {
+    loopMBB->addSuccessor(loop1MBB);
+    loopMBB->addSuccessor(loop2MBB);
+  } else {
+    loopMBB->addSuccessor(exitMBB);
+    loopMBB->addSuccessor(loopMBB);
+  }
   loopMBB->normalizeSuccProbs();
+  if (NoMovnInstr) {
+    loop1MBB->addSuccessor(loop2MBB);
+    loop2MBB->addSuccessor(loopMBB);
+    loop2MBB->addSuccessor(exitMBB, BranchProbability::getOne());
+  }
 
   BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
   assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!");
@@ -802,7 +823,7 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
       BuildMI(loopMBB, DL, TII->get(OR), Scratch)
           .addReg(Scratch)
           .addReg(Scratch2);
-    } else {
+    } else if (STI->hasMips4() || STI->hasMips32()) {
       // max: move Scratch, OldVal
       //      movn Scratch, Incr, Scratch2, Scratch
       // min: move Scratch, OldVal
@@ -814,6 +835,38 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
           .addReg(Incr)
           .addReg(Scratch2)
           .addReg(Scratch);
+    } else {
+      // if min:
+      // loopMBB:  move Scratch, OldVal
+      //           beq Scratch2_32, 0, loop1MBB
+      //           j loop2MBB
+      // loop1MBB: move Scratch, Incr
+      // loop2MBB: sc $2, 0($4)
+      //           beqz	$2, $BB0_1
+      //           nop
+      //
+      // if max:
+      // loopMBB:  move Scratch, Incr
+      //           beq Scratch2_32, 0, loop1MBB
+      //           j loop2MBB
+      // loop1MBB: move Scratch, OldVal
+      // loop2MBB: sc $2, 0($4)
+      //           beqz	$2, $BB0_1
+      //           nop
+      if (IsMin) {
+        BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(OldVal).addReg(ZERO);
+        BuildMI(loop1MBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO);
+      } else {
+        BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO);
+        BuildMI(loop1MBB, DL, TII->get(OR), Scratch)
+            .addReg(OldVal)
+            .addReg(ZERO);
+      }
+      BuildMI(loopMBB, DL, TII->get(BEQ))
+          .addReg(Scratch2_32)
+          .addReg(ZERO)
+          .addMBB(loop1MBB);
+      BuildMI(loopMBB, DL, TII->get(Mips::J)).addMBB(loop2MBB);
     }
 
   } else if (Opcode) {
@@ -829,20 +882,35 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
     BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO);
   }
 
-  BuildMI(loopMBB, DL, TII->get(SC), Scratch)
-      .addReg(Scratch)
-      .addReg(Ptr)
-      .addImm(0);
-  BuildMI(loopMBB, DL, TII->get(BEQ))
-      .addReg(Scratch)
-      .addReg(ZERO)
-      .addMBB(loopMBB);
+  if (NoMovnInstr) {
+    BuildMI(loop2MBB, DL, TII->get(SC), Scratch)
+        .addReg(Scratch)
+        .addReg(Ptr)
+        .addImm(0);
+    BuildMI(loop2MBB, DL, TII->get(BEQ))
+        .addReg(Scratch)
+        .addReg(ZERO)
+        .addMBB(loopMBB);
+  } else {
+    BuildMI(loopMBB, DL, TII->get(SC), Scratch)
+        .addReg(Scratch)
+        .addReg(Ptr)
+        .addImm(0);
+    BuildMI(loopMBB, DL, TII->get(BEQ))
+        .addReg(Scratch)
+        .addReg(ZERO)
+        .addMBB(loopMBB);
+  }
 
   NMBBI = BB.end();
   I->eraseFromParent();
 
   LivePhysRegs LiveRegs;
   computeAndAddLiveIns(LiveRegs, *loopMBB);
+  if (!STI->hasMips4() && !STI->hasMips32()) {
+    computeAndAddLiveIns(LiveRegs, *loop1MBB);
+    computeAndAddLiveIns(LiveRegs, *loop2MBB);
+  }
   computeAndAddLiveIns(LiveRegs, *exitMBB);
 
   return true;
diff --git a/llvm/test/CodeGen/Mips/atomic-min-max.ll b/llvm/test/CodeGen/Mips/atomic-min-max.ll
index 85bf6d02c7d8f..d19952439176e 100644
--- a/llvm/test/CodeGen/Mips/atomic-min-max.ll
+++ b/llvm/test/CodeGen/Mips/atomic-min-max.ll
@@ -12,6 +12,11 @@
 ; RUN: llc -mtriple=mips64-elf -O0 -mcpu=mips64r6 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=MIPS64R6
 ; RUN: llc -mtriple=mips64el-elf -O0 -mcpu=mips64r2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=MIPS64EL
 ; RUN: llc -mtriple=mips64el-elf -O0 -mcpu=mips64r6 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=MIPS64ELR6
+;
+; // mips2 does not test i16 and i8.
+; RUN: sed -e '/define.*@\(test_max_16\|test_umax_16\|test_min_16\|test_umin_16\)/,/^}$/d' \
+; RUN:     -e '/define.*@\(test_max_8\|test_umax_8\|test_min_8\|test_umin_8\)/,/^}$/d' %s > %t.filtered.ll
+; RUN: llc -mtriple=mipsel-elf -O0 -mcpu=mips2 -verify-machineinstrs %t.filtered.ll -o - | FileCheck %s --check-prefix=MIPS2
 
 define i32 @test_max_32(ptr nocapture %ptr, i32 signext %val) {
 ; MIPS-LABEL: test_max_32:
@@ -31,6 +36,33 @@ define i32 @test_max_32(ptr nocapture %ptr, i32 signext %val) {
 ; MIPS-NEXT:    jr $ra
 ; MIPS-NEXT:    nop
 ;
+; MIPS2-LABEL: test_max_32:
+; MIPS2:       # %bb.0: # %entry
+; MIPS2-NEXT:    sync
+; MIPS2-NEXT:  $BB0_1: # %entry
+; MIPS2-NEXT:    # =>This Inner Loop Header: Depth=1
+; MIPS2-NEXT:    ll $2, 0($4)
+; MIPS2-NEXT:    slt $3, $2, $5
+; MIPS2-NEXT:    move $1, $5
+; MIPS2-NEXT:    beqz $3, $BB0_3
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  # %bb.2: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; MIPS2-NEXT:    j $BB0_4
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  $BB0_3: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; MIPS2-NEXT:    move $1, $2
+; MIPS2-NEXT:  $BB0_4: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; MIPS2-NEXT:    sc $1, 0($4)
+; MIPS2-NEXT:    beqz $1, $BB0_1
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  # %bb.5: # %entry
+; MIPS2-NEXT:    sync
+; MIPS2-NEXT:    jr $ra
+; MIPS2-NEXT:    nop
+;
 ; MIPSR6-LABEL: test_max_32:
 ; MIPSR6:       # %bb.0: # %entry
 ; MIPSR6-NEXT:    sync
@@ -251,6 +283,33 @@ define i32 @test_min_32(ptr nocapture %ptr, i32 signext %val) {
 ; MIPS-NEXT:    jr $ra
 ; MIPS-NEXT:    nop
 ;
+; MIPS2-LABEL: test_min_32:
+; MIPS2:       # %bb.0: # %entry
+; MIPS2-NEXT:    sync
+; MIPS2-NEXT:  $BB1_1: # %entry
+; MIPS2-NEXT:    # =>This Inner Loop Header: Depth=1
+; MIPS2-NEXT:    ll $2, 0($4)
+; MIPS2-NEXT:    slt $3, $2, $5
+; MIPS2-NEXT:    move $1, $2
+; MIPS2-NEXT:    beqz $3, $BB1_3
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  # %bb.2: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB1_1 Depth=1
+; MIPS2-NEXT:    j $BB1_4
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  $BB1_3: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB1_1 Depth=1
+; MIPS2-NEXT:    move $1, $5
+; MIPS2-NEXT:  $BB1_4: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB1_1 Depth=1
+; MIPS2-NEXT:    sc $1, 0($4)
+; MIPS2-NEXT:    beqz $1, $BB1_1
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  # %bb.5: # %entry
+; MIPS2-NEXT:    sync
+; MIPS2-NEXT:    jr $ra
+; MIPS2-NEXT:    nop
+;
 ; MIPSR6-LABEL: test_min_32:
 ; MIPSR6:       # %bb.0: # %entry
 ; MIPSR6-NEXT:    sync
@@ -471,6 +530,33 @@ define i32 @test_umax_32(ptr nocapture %ptr, i32 signext %val) {
 ; MIPS-NEXT:    jr $ra
 ; MIPS-NEXT:    nop
 ;
+; MIPS2-LABEL: test_umax_32:
+; MIPS2:       # %bb.0: # %entry
+; MIPS2-NEXT:    sync
+; MIPS2-NEXT:  $BB2_1: # %entry
+; MIPS2-NEXT:    # =>This Inner Loop Header: Depth=1
+; MIPS2-NEXT:    ll $2, 0($4)
+; MIPS2-NEXT:    sltu $3, $2, $5
+; MIPS2-NEXT:    move $1, $5
+; MIPS2-NEXT:    beqz $3, $BB2_3
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  # %bb.2: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB2_1 Depth=1
+; MIPS2-NEXT:    j $BB2_4
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  $BB2_3: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB2_1 Depth=1
+; MIPS2-NEXT:    move $1, $2
+; MIPS2-NEXT:  $BB2_4: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB2_1 Depth=1
+; MIPS2-NEXT:    sc $1, 0($4)
+; MIPS2-NEXT:    beqz $1, $BB2_1
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  # %bb.5: # %entry
+; MIPS2-NEXT:    sync
+; MIPS2-NEXT:    jr $ra
+; MIPS2-NEXT:    nop
+;
 ; MIPSR6-LABEL: test_umax_32:
 ; MIPSR6:       # %bb.0: # %entry
 ; MIPSR6-NEXT:    sync
@@ -691,6 +777,33 @@ define i32 @test_umin_32(ptr nocapture %ptr, i32 signext %val) {
 ; MIPS-NEXT:    jr $ra
 ; MIPS-NEXT:    nop
 ;
+; MIPS2-LABEL: test_umin_32:
+; MIPS2:       # %bb.0: # %entry
+; MIPS2-NEXT:    sync
+; MIPS2-NEXT:  $BB3_1: # %entry
+; MIPS2-NEXT:    # =>This Inner Loop Header: Depth=1
+; MIPS2-NEXT:    ll $2, 0($4)
+; MIPS2-NEXT:    sltu $3, $2, $5
+; MIPS2-NEXT:    move $1, $2
+; MIPS2-NEXT:    beqz $3, $BB3_3
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  # %bb.2: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; MIPS2-NEXT:    j $BB3_4
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  $BB3_3: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; MIPS2-NEXT:    move $1, $5
+; MIPS2-NEXT:  $BB3_4: # %entry
+; MIPS2-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; MIPS2-NEXT:    sc $1, 0($4)
+; MIPS2-NEXT:    beqz $1, $BB3_1
+; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  # %bb.5: # %entry
+; MIPS2-NEXT:    sync
+; MIPS2-NEXT:    jr $ra
+; MIPS2-NEXT:    nop
+;
 ; MIPSR6-LABEL: test_umin_32:
 ; MIPSR6:       # %bb.0: # %entry
 ; MIPSR6-NEXT:    sync
 | 
|  | ||
| NMBBI = BB.end(); | ||
| I->eraseFromParent(); | ||
|  | ||
| LivePhysRegs LiveRegs; | ||
| computeAndAddLiveIns(LiveRegs, *loopMBB); | ||
| if (!STI->hasMips4() && !STI->hasMips32()) { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you instead check that you have loop1MBB and loop2MBB instead of checking the correlated conditions that create them
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Applied.
91e0e20    to
    c9db3ab      
    Compare
  
    | if (loop1MBB) | ||
| computeAndAddLiveIns(LiveRegs, *loop1MBB); | ||
| if (loop2MBB) | ||
| computeAndAddLiveIns(LiveRegs, *loop2MBB); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (loop1MBB) | |
| computeAndAddLiveIns(LiveRegs, *loop1MBB); | |
| if (loop2MBB) | |
| computeAndAddLiveIns(LiveRegs, *loop2MBB); | |
| if (loop1MBB) { | |
| assert(loop2MBB && "should have 2 loop blocks"); | |
| computeAndAddLiveIns(LiveRegs, *loop1MBB); | |
| computeAndAddLiveIns(LiveRegs, *loop2MBB); | |
| } | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Applied.
c9db3ab    to
    5907b15      
    Compare
  
    | 
 I don't see that change in the original PR? In any case, just deleting the failing tests isn't really a solution to the problem | 
| ; // mips2 does not test i16 and i8. | ||
| ; RUN: sed -e '/define.*@\(test_max_16\|test_umax_16\|test_min_16\|test_umin_16\)/,/^}$/d' \ | ||
| ; RUN: -e '/define.*@\(test_max_8\|test_umax_8\|test_min_8\|test_umin_8\)/,/^}$/d' %s > %t.filtered.ll | ||
| ; RUN: llc -mtriple=mipsel-elf -O0 -mcpu=mips2 -verify-machineinstrs %t.filtered.ll -o - | FileCheck %s --check-prefix=MIPS2 | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not maintainable. Do not do this. Even if you aren't bothering to support the sub-dword case, you should have test coverage for the error cases
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I will be more rigorous in the future.
5907b15    to
    1ffe78b      
    Compare
  
    | 
 Thank you for your correction, I should not research the broken reasons in such a simple and crude way. | 
…for mips2 Modify instr movn/movz to mixture of beq, move, and sc. Because atomic-min-max.ll test broken on the expensive builder, I revert llvm#149983 and resubmit this PR. The broken reason: In i16/i8 function expandAtomicBinOpSubword, we use two successor after loop2MBB, one does not specify the second parameter, the other use BranchProbability::getOne() that means 100% probability. This is contradictory. And the second successor is also specified incorrectly. The changess: * llvm/lib/Target/Mips/MipsExpandPseudo.cpp: Change loop2MBB`s second successor to correct one and delete the second parameter BranchProbability::getOne(). * llvm/test/CodeGen/Mips/atomic-min-max.ll: Add -verify-machineinstrs option in RUN command; Modify i16 test and i8 test according to the changes. Fix llvm#145411.
1ffe78b    to
    e1c6251      
    Compare
  
    …for mips2 (llvm#159717) Modify instr movn/movz to mixture of beq, move, and sc. Because atomic-min-max.ll test broken on the expensive builder, I revert llvm#149983 and resubmit this PR. The broken reason: In i16/i8 function expandAtomicBinOpSubword, we use two successor after loop2MBB, one does not specify the second parameter, the other use BranchProbability::getOne() that means 100% probability. This is contradictory. And the second successor is also specified incorrectly. The changess: * llvm/lib/Target/Mips/MipsExpandPseudo.cpp: Change loop2MBB`s second successor to correct one and delete the second parameter BranchProbability::getOne(). * llvm/test/CodeGen/Mips/atomic-min-max.ll: Add -verify-machineinstrs option in RUN command; Modify i16 test and i8 test according to the changes. Fix llvm#145411. (cherry picked from commit 114b3b8)
…for mips2 (llvm#159717) Modify instr movn/movz to mixture of beq, move, and sc. Because atomic-min-max.ll test broken on the expensive builder, I revert llvm#149983 and resubmit this PR. The broken reason: In i16/i8 function expandAtomicBinOpSubword, we use two successor after loop2MBB, one does not specify the second parameter, the other use BranchProbability::getOne() that means 100% probability. This is contradictory. And the second successor is also specified incorrectly. The changess: * llvm/lib/Target/Mips/MipsExpandPseudo.cpp: Change loop2MBB`s second successor to correct one and delete the second parameter BranchProbability::getOne(). * llvm/test/CodeGen/Mips/atomic-min-max.ll: Add -verify-machineinstrs option in RUN command; Modify i16 test and i8 test according to the changes. Fix llvm#145411.
Modify instr movn/movz to mixture of beq, move, and sc.
Because atomic-min-max.ll test broken on the expensive builder, I revert
#149983 and resubmit this PR.
The broken reason:
In i16/i8 function expandAtomicBinOpSubword, we use two successor
after loop2MBB, one does not specify the second parameter, the other
use BranchProbability::getOne() that means 100% probability. This is
contradictory. And the second successor is also specified incorrectly.
The changess:
Change loop2MBB`s second successor to correct one and delete the
second parameter BranchProbability::getOne().
Add -verify-machineinstrs option in RUN command;
Modify i16 test and i8 test according to the changes.
Fix #145411.