-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64] Use CNEG for absolute difference patterns. #151177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Use CNEG for absolute difference patterns. #151177
Conversation
The current code generated for absolute difference patterns (a > b ? a - b : b - a) typically consists of sequences of: ``` sub w8, w1, w0 subs w9, w0, w1 csel w0, w9, w8, hi ``` The first sub is redundant if the csel is replaced by a cneg: ``` subs w8, w0, w1 cneg w0, w8, ls ``` This is achieved by canonicalising select(cc, sub(lhs, rhs), sub(rhs, lhs)) -> select(cc, sub(lhs, rhs), neg(sub(lhs, rhs))) and select(cc, sub(rhs, lhs), sub(lhs, rhs)) -> select(cc, neg(sub(lhs, rhs)), sub(lhs, rhs)) as the second forms can already be matched.
|
@llvm/pr-subscribers-backend-aarch64 Author: Ricardo Jesus (rj-jesus) ChangesThe current code generated for absolute difference patterns (a > b ? a - b : b - a) typically consists of sequences of: The first sub is redundant if the csel is replaced by a cneg: This is achieved by canonicalising as the second forms can already be matched. This helps with some of the patterns in #118413. Patch is 25.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151177.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7b49754ee7e1f..06678c2fa3089 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11386,6 +11386,22 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
return DAG.getNode(ISD::AND, DL, VT, LHS, Shift);
}
+ // Canonicalise absolute difference patterns:
+ // select(cc, sub(lhs, rhs), sub(rhs, lhs)) ->
+ // select(cc, sub(lhs, rhs), neg(sub(lhs, rhs)))
+ //
+ // select(cc, sub(rhs, lhs), sub(lhs, rhs)) ->
+ // select(cc, neg(sub(lhs, rhs)), sub(lhs, rhs))
+ // The second forms can be matched into subs+cneg.
+ if (TVal.getOpcode() == ISD::SUB && FVal.getOpcode() == ISD::SUB) {
+ if (TVal.getOperand(0) == LHS && TVal.getOperand(1) == RHS &&
+ FVal.getOperand(0) == RHS && FVal.getOperand(1) == LHS)
+ FVal = DAG.getNegative(TVal, DL, TVal.getValueType());
+ else if (TVal.getOperand(0) == RHS && TVal.getOperand(1) == LHS &&
+ FVal.getOperand(0) == LHS && FVal.getOperand(1) == RHS)
+ TVal = DAG.getNegative(FVal, DL, FVal.getValueType());
+ }
+
unsigned Opcode = AArch64ISD::CSEL;
// If both the TVal and the FVal are constants, see if we can swap them in
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 432ffc30eec5e..75247823ee793 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -77,10 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w9, w1, w8
-; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: csel w8, w8, w9, gt
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w8
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -111,10 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w0
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -129,10 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: sub w9, w8, w0
-; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: csel w8, w8, w9, gt
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w8, w0
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -146,10 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w0
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -163,10 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x8, x9, x8, gt
-; CHECK-NEXT: neg x0, x8
+; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -180,10 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x8, x9, x8, gt
-; CHECK-NEXT: neg x0, x8
+; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -359,9 +347,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w8, w9, ge
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%cmp = icmp sge i32 %a, %b
%ab = sub i32 %a, %b
@@ -373,9 +360,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, lt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%cmp = icmp slt i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index ed1e6077948ee..bbdb116851710 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -73,9 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w9, w1, w8
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -104,9 +103,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -120,9 +118,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: sub w9, w8, w0
; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -135,9 +132,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -150,9 +146,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -165,9 +160,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -248,9 +242,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -261,9 +254,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -324,9 +316,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%cmp = icmp slt i32 %a, %b
%ab = sub i32 %a, %b
@@ -338,9 +329,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = sub i64 %a, %b
@@ -572,9 +562,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -586,9 +575,8 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 8fb106e92866e..d07f099a536ab 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -77,10 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w9, w1, w8
-; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: csel w8, w8, w9, hi
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w8
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i32 %b to i64
@@ -111,10 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w8, w9, w8, hi
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w0
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -129,10 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: sub w9, w8, w0
-; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: csel w8, w8, w9, hi
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w8, w0
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i16 %b to i64
@@ -146,10 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w8, w9, w8, hi
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w0
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -163,10 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x8, x9, x8, hi
-; CHECK-NEXT: neg x0, x8
+; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -180,10 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x8, x9, x8, hi
-; CHECK-NEXT: neg x0, x8
+; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -363,9 +351,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w8, w9, hs
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%cmp = icmp uge i32 %a, %b
%ab = sub i32 %a, %b
@@ -377,9 +364,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, lo
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%cmp = icmp ult i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 4585de96c848f..1045ee20dc734 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -73,9 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w9, w1, w8
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: csel w0, w8, w9, hi
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i32 %b to i64
@@ -104,9 +103,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -120,9 +118,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: sub w9, w8, w0
; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: csel w0, w8, w9, hi
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i16 %b to i64
@@ -135,9 +132,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -150,9 +146,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -165,9 +160,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -252,9 +246,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -265,9 +258,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
@@ -330,9 +322,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%cmp = icmp ult i32 %a, %b
%ab = sub i32 %a, %b
@@ -344,9 +335,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = sub i64 %a, %b
@@ -437,9 +427,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -451,9 +440,8 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index bbdce7c6e933b..15c1dffae749e 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -13,10 +13,9 @@
define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w9, w1, ...
[truncated]
|
…s. (#152130) As a follow-up to #151177, when lowering SELECT_CC nodes of absolute difference patterns, drop poison-generating flags from the negated operand to avoid inadvertently propagating poison. As discussed in the PR above, I didn't find practical issues with the current code, but it seems safer to do this preemptively.
This patch avoids a comparison against zero when lowering abs(sub(a, b)) patterns, instead reusing the condition codes generated by a subs of the operands directly. For example, currently: ``` sxtb w8, w0 sub w8, w8, w1, sxtb cmp w8, #0 cneg w0, w8, mi ``` becomes: ``` sxtb w8, w0 subs w8, w8, w1, sxtb cneg w0, w8, mi ``` Together with #151177, this should handle the remaining patterns in #118413.
The current code generated for absolute difference patterns (a > b ? a - b : b - a) typically consists of sequences of:
The first sub is redundant if the csel is replaced by a cneg:
This is achieved by canonicalising
as the second forms can already be matched.
This helps with some of the patterns in #118413.