Skip to content

Conversation

@AZero13
Copy link
Contributor

@AZero13 AZero13 commented Aug 14, 2025

This should be the peephole's job. Because and sets V flag to 0, this is why signed comparisons with 0 are okay to replace with tst. Note this is only for AArch64, because ANDS on ARM leaves the V flag the same.

Fixes: #154387

@llvmbot
Copy link
Member

llvmbot commented Aug 14, 2025

@llvm/pr-subscribers-backend-aarch64

Author: AZero13 (AZero13)

Changes

This should be the peephole's job. Because and sets V flag to 0, this is why signed comparisons with 0 are okay to replace with tst. Note this is only for AArch64, because ANDS on ARM leaves the V flag the same.


Full diff: https://github.com/llvm/llvm-project/pull/153608.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+75-1)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp (+20)
  • (added) llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir (+41)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a55f103bff385..1d909600413aa 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1433,6 +1433,34 @@ static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
   case AArch64::SUBSXrx:
     return AArch64::SUBXrx;
+  case AArch64::ANDSWri:
+    return AArch64::ANDWri;
+  case AArch64::ANDSWrr:
+    return AArch64::ANDWrr;
+  case AArch64::ANDSWrs:
+    return AArch64::ANDWrs;
+  case AArch64::ANDSXri:
+    return AArch64::ANDXri;
+  case AArch64::ANDSXrr:
+    return AArch64::ANDXrr;
+  case AArch64::ANDSXrs:
+    return AArch64::ANDXrs;
+  case AArch64::BICSWrr:
+    return AArch64::BICWrr;
+  case AArch64::BICSXrr:
+    return AArch64::BICXrr;
+  case AArch64::BICSWrs:
+    return AArch64::BICWrs;
+  case AArch64::BICSXrs:
+    return AArch64::BICXrs;
+  case AArch64::SBCSXr:
+    return AArch64::SBCXr;
+  case AArch64::SBCSWr:
+    return AArch64::SBCWr;
+  case AArch64::ADCSXr:
+    return AArch64::ADCXr;
+  case AArch64::ADCSWr:
+    return AArch64::ADCWr;
   }
 }
 
@@ -1717,6 +1745,16 @@ static unsigned sForm(MachineInstr &Instr) {
   case AArch64::SUBSWri:
   case AArch64::SUBSXrr:
   case AArch64::SUBSXri:
+  case AArch64::ANDSWri:
+  case AArch64::ANDSWrr:
+  case AArch64::ANDSWrs:
+  case AArch64::ANDSXri:
+  case AArch64::ANDSXrr:
+  case AArch64::ANDSXrs:
+  case AArch64::BICSWrr:
+  case AArch64::BICSXrr:
+  case AArch64::BICSWrs:
+  case AArch64::BICSXrs:
     return Instr.getOpcode();
 
   case AArch64::ADDWrr:
@@ -1747,6 +1785,22 @@ static unsigned sForm(MachineInstr &Instr) {
     return AArch64::ANDSWri;
   case AArch64::ANDXri:
     return AArch64::ANDSXri;
+  case AArch64::ANDWrr:
+    return AArch64::ANDSWrr;
+  case AArch64::ANDWrs:
+    return AArch64::ANDSWrs;
+  case AArch64::ANDXrr:
+    return AArch64::ANDSXrr;
+  case AArch64::ANDXrs:
+    return AArch64::ANDSXrs;
+  case AArch64::BICWrr:
+    return AArch64::BICSWrr;
+  case AArch64::BICXrr:
+    return AArch64::BICSXrr;
+  case AArch64::BICWrs:
+    return AArch64::BICSWrs;
+  case AArch64::BICXrs:
+    return AArch64::BICSXrs;
   }
 }
 
@@ -1884,6 +1938,26 @@ static bool isSUBSRegImm(unsigned Opcode) {
   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
 }
 
+
+static bool isANDOpcode(MachineInstr &MI) {
+  unsigned Opc = sForm(MI);
+  switch (Opc) {
+  case AArch64::ANDSWri:
+  case AArch64::ANDSWrr:
+  case AArch64::ANDSWrs:
+  case AArch64::ANDSXri:
+  case AArch64::ANDSXrr:
+  case AArch64::ANDSXrs:
+  case AArch64::BICSWrr:
+  case AArch64::BICSXrr:
+  case AArch64::BICSWrs:
+  case AArch64::BICSXrs:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Check if CmpInstr can be substituted by MI.
 ///
 /// CmpInstr can be substituted:
@@ -1921,7 +1995,7 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
   // 1) MI and CmpInstr set N and V to the same value.
   // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
   //    signed overflow occurs, so CmpInstr could still be simplified away.
-  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
+  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
     return false;
 
   AccessKind AccessToCheck = AK_Write;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 4bd025da636ca..318346c2c8aec 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -87,6 +87,26 @@ unsigned getNonFlagSettingVariant(unsigned Opc) {
     return AArch64::ADDXri;
   case AArch64::ADDSWri:
     return AArch64::ADDWri;
+  case AArch64::ANDSWri:
+    return AArch64::ANDWri;
+  case AArch64::ANDSWrr:
+    return AArch64::ANDWrr;
+  case AArch64::ANDSWrs:
+    return AArch64::ANDWrs;
+  case AArch64::ANDSXri:
+    return AArch64::ANDXri;
+  case AArch64::ANDSXrr:
+    return AArch64::ANDXrr;
+  case AArch64::ANDSXrs:
+    return AArch64::ANDXrs;
+  case AArch64::BICSWrr:
+    return AArch64::BICWrr;
+  case AArch64::BICSXrr:
+    return AArch64::BICXrr;
+  case AArch64::BICSWrs:
+    return AArch64::BICWrs;
+  case AArch64::BICSXrs:
+    return AArch64::BICXrs;
   case AArch64::SBCSXr:
     return AArch64::SBCXr;
   case AArch64::SBCSWr:
diff --git a/llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir b/llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir
new file mode 100644
index 0000000000000..27a8f91f30a8b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir
@@ -0,0 +1,41 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opt -o - %s | FileCheck %s
+# CHECK: %1:gpr32common = ANDSWri {{.*}}
+# CHECK-NOT: $wzr = SUBSWri {{.*}}
+--- |
+  define i32 @test01() nounwind {
+  entry:
+    %0 = select i1 true, i32 1, i32 0
+    %1 = and i32 %0, 65535
+    %2 = icmp sgt i32 %1, 0
+    br i1 %2, label %if.then, label %if.end
+
+  if.then:                                      ; preds = %entry
+    ret i32 1
+
+  if.end:                                       ; preds = %entry
+    ret i32 0
+  }
+...
+---
+name:            test01
+registers:
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: gpr32common }
+body:             |
+  bb.0.entry:
+    successors: %bb.2.if.end, %bb.1.if.then
+
+    %0 = MOVi32imm 1
+    %1 = ANDWri killed %1, 15
+    $wzr = SUBSWri killed %1, 0, 0, implicit-def $nzcv
+    Bcc 12, %bb.2.if.end, implicit $nzcv
+
+  bb.1.if.then:
+    $w0 = MOVi32imm 1
+    RET_ReallyLR implicit $w0
+
+  bb.2.if.end:
+    $w0 = MOVi32imm 0
+    RET_ReallyLR implicit $w0
+
+...

@github-actions
Copy link

github-actions bot commented Aug 14, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@AZero13 AZero13 force-pushed the ede branch 3 times, most recently from c1453eb to 38d4b65 Compare August 15, 2025 22:56
@AZero13
Copy link
Contributor Author

AZero13 commented Aug 18, 2025

@davemgreen what do you think about this?

@AZero13
Copy link
Contributor Author

AZero13 commented Aug 19, 2025

@MacDue Ping?

@MacDue
Copy link
Member

MacDue commented Aug 21, 2025

I think the utility of your change would be more apparent if you pre-comitted some tests. IIUC this is folding things like:

        and     w8, w9, w8
        cmp     w8, #0

to

tst w9, w8

?

@AZero13
Copy link
Contributor Author

AZero13 commented Aug 21, 2025

I think the utility of your change would be more apparent if you pre-comitted some tests. IIUC this is folding things like:


        and     w8, w9, w8

        cmp     w8, #0

to


tst w9, w8

?

Yes exactly that's the point

@AZero13
Copy link
Contributor Author

AZero13 commented Aug 22, 2025

@MacDue Done!

@AZero13 AZero13 force-pushed the ede branch 2 times, most recently from b080125 to 4add981 Compare August 23, 2025 00:21
@AZero13
Copy link
Contributor Author

AZero13 commented Sep 26, 2025

@davemgreen ping?

@AZero13
Copy link
Contributor Author

AZero13 commented Nov 4, 2025

@davemgreen We cannot fold all cases in SelectionDAG/GISel because freezes

@AZero13 AZero13 force-pushed the ede branch 2 times, most recently from c34dec4 to 1c6a37b Compare November 7, 2025 20:01
@AZero13
Copy link
Contributor Author

AZero13 commented Nov 7, 2025

@MacDue I addressed the issues. Thank you!

This should be the peephole's job. Because and sets V flag to 0, this is why signed comparisons with 0 are okay to replace with tst. Note this is only for AArch64, because ANDS on ARM leaves the V flag the same.

Fixes: llvm#154387
Copy link
Member

@MacDue MacDue left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@AZero13
Copy link
Contributor Author

AZero13 commented Nov 10, 2025

Thank you @MacDue can you please merge?

@AZero13 AZero13 requested a review from MacDue November 10, 2025 16:47
@MacDue MacDue merged commit 7b12a08 into llvm:main Nov 10, 2025
10 checks passed
@AZero13 AZero13 deleted the ede branch November 10, 2025 22:32
return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
}

static bool isANDOpcode(MachineInstr &MI) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const MachineInstr &

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I guess sForm requires non-const reference, but that also seems wrong.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[AArch64] AND optimization does not work if there is a freeze between AND and the cmp

4 participants