Skip to content

Conversation

@AZero13
Copy link
Contributor

@AZero13 AZero13 commented Aug 15, 2025

As long as we don't use overflow flags, we should be good.

In fact, this is more likely to happen than having a comparison where you have x - y < 0 because LLVM tends to canonicalize this to x < y when possible, but it does not do the same with x + y < 0 -> x < -y.

…S(x,y)

As long as we don't use overflow flags, we should be good.

In fact, this is more likely to happen than having a comparison where you have x - y < 0 because LLVM tends to canonicalize this to x < y when possible, but it does not do the same with
x + y < 0 -> x < -y.
@llvmbot
Copy link
Member

llvmbot commented Aug 15, 2025

@llvm/pr-subscribers-backend-aarch64

Author: AZero13 (AZero13)

Changes

As long as we don't use overflow flags, we should be good.

In fact, this is more likely to happen than having a comparison where you have x - y < 0 because LLVM tends to canonicalize this to x < y when possible, but it does not do the same with x + y < 0 -> x < -y.


Full diff: https://github.com/llvm/llvm-project/pull/153859.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+19-13)
  • (modified) llvm/test/CodeGen/AArch64/peephole-and-tst.ll (+27-13)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2072e48914ae6..aa6905e638bb9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25486,25 +25486,31 @@ static SDValue performCSELCombine(SDNode *N,
     }
   }
 
-  // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't
-  // use overflow flags, to avoid the comparison with zero. In case of success,
-  // this also replaces the original SUB(x,y) with the newly created SUBS(x,y).
-  // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB
-  // nodes with their SUBS equivalent as is already done for other flag-setting
+  // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) or // CSEL a,
+  // b, cc, SUBS(ADD(x,y), 0) -> CSEL a, b, cc, ADDS(x,y) if cc doesn't use
+  // overflow flags, to avoid the comparison with zero. In case of success, this
+  // also replaces the original SUB(x,y) with the newly created SUBS(x,y). NOTE:
+  // Perhaps in the future use performFlagSettingCombine to replace SUB nodes
+  // with their SUBS equivalent as is already done for other flag-setting
   // operators, in which case doing the replacement here becomes redundant.
   if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) &&
       isNullConstant(Cond.getOperand(1))) {
-    SDValue Sub = Cond.getOperand(0);
+    SDValue SubOrAdd = Cond.getOperand(0);
     AArch64CC::CondCode CC =
         static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
-    if (Sub.getOpcode() == ISD::SUB &&
-        (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI ||
-         CC == AArch64CC::PL)) {
+    if ((SubOrAdd.getOpcode() == ISD::SUB ||
+         SubOrAdd.getOpcode() == ISD::ADD) &&
+        ((CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI ||
+          CC == AArch64CC::PL))) {
       SDLoc DL(N);
-      SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
-                                 Sub.getOperand(0), Sub.getOperand(1));
-      DCI.CombineTo(Sub.getNode(), Subs);
-      DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1));
+
+      SDValue Result =
+          DAG.getNode((SubOrAdd.getOpcode() == ISD::SUB ? AArch64ISD::SUBS
+                                                        : AArch64ISD::ADDS),
+                      DL, Cond->getVTList(), SubOrAdd.getOperand(0),
+                      SubOrAdd.getOperand(1));
+      DCI.CombineTo(SubOrAdd.getNode(), Result);
+      DCI.CombineTo(Cond.getNode(), Result, Result.getValue(1));
       return SDValue(N, 0);
     }
   }
diff --git a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
index 3caac1d13495d..057c373967257 100644
--- a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
+++ b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
@@ -295,19 +295,33 @@ define i64 @test_and_4(i64 %x, i64 %y) {
 }
 
 define i64 @test_add(i64 %x, i64 %y) {
-; CHECK-LABEL: test_add:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    add x19, x0, #3
-; CHECK-NEXT:    mov x0, xzr
-; CHECK-NEXT:    bl callee
-; CHECK-NEXT:    cmp x19, #0
-; CHECK-NEXT:    csel x0, x19, x0, eq
-; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_add:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w19, -8
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    mov x19, x0
+; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:    bl callee
+; CHECK-SD-NEXT:    adds x8, x19, #3
+; CHECK-SD-NEXT:    csel x0, x8, x0, eq
+; CHECK-SD-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_add:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w19, -8
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    add x19, x0, #3
+; CHECK-GI-NEXT:    mov x0, xzr
+; CHECK-GI-NEXT:    bl callee
+; CHECK-GI-NEXT:    cmp x19, #0
+; CHECK-GI-NEXT:    csel x0, x19, x0, eq
+; CHECK-GI-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ret
   %a = add i64 %x, 3
   %b = call i64 @callee(i64 0)
   %c = icmp eq i64 %a, 0

@AZero13 AZero13 closed this Aug 26, 2025
@AZero13 AZero13 deleted the overflowsse branch August 26, 2025 20:21
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants